In [1]:
rm(list=ls())
setwd("/hpc/group/pbenfeylab/CheWei/")
In [2]:
libs <- c("here", "dplyr", "tradeSeq", "SingleCellExperiment", "slingshot",
           "condiments", "scater", "RColorBrewer", "pheatmap", "cowplot",
          "tidyr","condimentsPaper","Seurat")
suppressMessages(
  suppressWarnings(sapply(libs, require, character.only = TRUE))
)
here
TRUE
dplyr
TRUE
tradeSeq
TRUE
SingleCellExperiment
TRUE
slingshot
TRUE
condiments
TRUE
scater
TRUE
RColorBrewer
TRUE
pheatmap
TRUE
cowplot
TRUE
tidyr
TRUE
condimentsPaper
FALSE
Seurat
TRUE
In [3]:
suppressPackageStartupMessages({
  library(slingshot); library(SingleCellExperiment)
  library(RColorBrewer); library(scales)
  library(viridis); library(UpSetR)
  library(pheatmap); library(msigdbr)
  library(fgsea); library(knitr)
  library(ggplot2); library(gridExtra)
  library(tradeSeq);library(Seurat)
  library(tidyverse);library(condiments)
  library(patchwork);library(ComplexHeatmap)
  library(circlize);library(WGCNA)  
  library(tricycle);library(GeneOverlap)
  library(gprofiler2);library(ggrepel)
})
In [4]:
sessionInfo()
R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: AlmaLinux 9.3 (Shamrock Pampas Cat)

Matrix products: default
BLAS/LAPACK: /hpc/group/pbenfeylab/ch416/miniconda3/envs/seu4/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggrepel_0.9.4               gprofiler2_0.2.2           
 [3] GeneOverlap_1.34.0          tricycle_1.6.0             
 [5] WGCNA_1.72-1                fastcluster_1.2.3          
 [7] dynamicTreeCut_1.63-1       circlize_0.4.15            
 [9] ComplexHeatmap_2.14.0       patchwork_1.1.3            
[11] forcats_0.5.2               stringr_1.5.1              
[13] purrr_1.0.2                 readr_2.1.3                
[15] tibble_3.2.1                tidyverse_1.3.2            
[17] gridExtra_2.3               knitr_1.45                 
[19] fgsea_1.24.0                msigdbr_7.5.1              
[21] UpSetR_1.4.0                viridis_0.6.4              
[23] viridisLite_0.4.2           scales_1.2.1               
[25] SeuratObject_4.1.3          Seurat_4.1.1.9001          
[27] tidyr_1.3.0                 cowplot_1.1.1              
[29] pheatmap_1.0.12             RColorBrewer_1.1-3         
[31] scater_1.26.1               ggplot2_3.4.4              
[33] scuttle_1.8.0               condiments_1.6.0           
[35] slingshot_2.6.0             TrajectoryUtils_1.6.0      
[37] princurve_2.1.6             SingleCellExperiment_1.20.0
[39] SummarizedExperiment_1.28.0 Biobase_2.58.0             
[41] GenomicRanges_1.50.0        GenomeInfoDb_1.34.8        
[43] IRanges_2.32.0              S4Vectors_0.36.0           
[45] BiocGenerics_0.44.0         MatrixGenerics_1.10.0      
[47] matrixStats_1.1.0           tradeSeq_1.12.0            
[49] dplyr_1.1.3                 here_1.0.1                 

loaded via a namespace (and not attached):
  [1] rsvd_1.0.5                Hmisc_5.1-1              
  [3] ica_1.0-3                 class_7.3-22             
  [5] foreach_1.5.2             lmtest_0.9-40            
  [7] rprojroot_2.0.4           crayon_1.5.2             
  [9] MASS_7.3-58.3             distinct_1.10.2          
 [11] nlme_3.1-162              backports_1.4.1          
 [13] reprex_2.0.2              transport_0.13-0         
 [15] impute_1.72.0             rlang_1.1.2              
 [17] XVector_0.38.0            caret_6.0-94             
 [19] ROCR_1.0-11               readxl_1.4.1             
 [21] irlba_2.3.5.1             limma_3.54.0             
 [23] BiocParallel_1.32.5       rjson_0.2.21             
 [25] bit64_4.0.5               glue_1.6.2               
 [27] rngtools_1.5.2            sctransform_0.4.1        
 [29] parallel_4.2.2            vipor_0.4.5              
 [31] spatstat.sparse_3.0-3     AnnotationDbi_1.60.0     
 [33] spatstat.geom_3.2-7       haven_2.5.1              
 [35] tidyselect_1.2.0          fitdistrplus_1.1-11      
 [37] zoo_1.8-12                xtable_1.8-4             
 [39] RcppHNSW_0.5.0            magrittr_2.0.3           
 [41] evaluate_0.23             cli_3.6.1                
 [43] zlibbioc_1.44.0           rstudioapi_0.15.0        
 [45] doRNG_1.8.6               miniUI_0.1.1.1           
 [47] sp_2.1-1                  spatstat.linnet_3.1-0    
 [49] rpart_4.1.19              fastmatch_1.1-3          
 [51] fastDummies_1.7.3         shiny_1.7.5.1            
 [53] BiocSingular_1.14.0       xfun_0.41                
 [55] spatstat.model_3.2-3      clue_0.3-64              
 [57] cluster_2.1.4             caTools_1.18.2           
 [59] pbdZMQ_0.3-8              KEGGREST_1.38.0          
 [61] listenv_0.9.0             Biostrings_2.66.0        
 [63] png_0.1-8                 future_1.33.0            
 [65] ipred_0.9-14              withr_2.5.2              
 [67] bitops_1.0-7              plyr_1.8.9               
 [69] cellranger_1.1.0          hardhat_1.3.0            
 [71] e1071_1.7-13              pROC_1.18.0              
 [73] pillar_1.9.0              gplots_3.1.3             
 [75] GlobalOptions_0.1.2       cachem_1.0.8             
 [77] Ecume_0.9.1               fs_1.6.3                 
 [79] kernlab_0.9-32            GetoptLong_1.0.5         
 [81] DelayedMatrixStats_1.20.0 vctrs_0.6.4              
 [83] ellipsis_0.3.2            generics_0.1.3           
 [85] lava_1.7.2.1              tools_4.2.2              
 [87] foreign_0.8-85            beeswarm_0.4.0           
 [89] munsell_0.5.0             proxy_0.4-27             
 [91] DelayedArray_0.24.0       fastmap_1.1.1            
 [93] compiler_4.2.2            abind_1.4-5              
 [95] httpuv_1.6.12             plotly_4.10.3            
 [97] GenomeInfoDbData_1.2.9    prodlim_2023.03.31       
 [99] edgeR_3.40.1              ggnewscale_0.4.8         
[101] lattice_0.21-8            deldir_1.0-9             
[103] utf8_1.2.4                later_1.3.1              
[105] recipes_1.0.6             jsonlite_1.8.7           
[107] ScaledMatrix_1.6.0        pbapply_1.7-2            
[109] sparseMatrixStats_1.10.0  lazyeval_0.2.2           
[111] promises_1.2.1            spatstat_3.0-5           
[113] doParallel_1.0.17         goftest_1.2-3            
[115] checkmate_2.3.0           spatstat.utils_3.0-4     
[117] reticulate_1.34.0         rmarkdown_2.25           
[119] Rtsne_0.16                uwot_0.1.16              
[121] igraph_1.5.1              survival_3.4-0           
[123] htmltools_0.5.7           memoise_2.0.1            
[125] locfit_1.5-9.6            digest_0.6.33            
[127] assertthat_0.2.1          mime_0.12                
[129] repr_1.1.4                RSQLite_2.3.1            
[131] future.apply_1.11.0       data.table_1.14.8        
[133] blob_1.2.4                preprocessCore_1.60.2    
[135] Formula_1.2-5             splines_4.2.2            
[137] googledrive_2.0.0         RCurl_1.98-1.6           
[139] broom_1.0.2               hms_1.1.2                
[141] modelr_0.1.10             colorspace_2.1-0         
[143] base64enc_0.1-3           ggbeeswarm_0.7.1         
[145] shape_1.4.6               nnet_7.3-19              
[147] Rcpp_1.0.11               RANN_2.6.1               
[149] mvtnorm_1.1-3             fansi_1.0.5              
[151] tzdb_0.3.0                parallelly_1.36.0        
[153] IRdisplay_1.1             ModelMetrics_1.2.2.2     
[155] R6_2.5.1                  ggridges_0.5.4           
[157] lifecycle_1.0.4           googlesheets4_1.0.1      
[159] leiden_0.4.3              Matrix_1.5-4             
[161] RcppAnnoy_0.0.21          iterators_1.0.14         
[163] spatstat.explore_3.2-5    gower_1.0.1              
[165] htmlwidgets_1.6.2         beachmat_2.14.0          
[167] polyclip_1.10-6           timechange_0.1.1         
[169] circular_0.4-95           rvest_1.0.3              
[171] mgcv_1.8-42               globals_0.16.2           
[173] htmlTable_2.4.2           spatstat.random_3.2-1    
[175] progressr_0.14.0          codetools_0.2-19         
[177] lubridate_1.9.0           GO.db_3.16.0             
[179] gtools_3.9.4              dbplyr_2.2.1             
[181] RSpectra_0.16-1           gtable_0.3.4             
[183] DBI_1.1.3                 tensor_1.5               
[185] httr_1.4.7                KernSmooth_2.23-20       
[187] stringi_1.8.1             reshape2_1.4.4           
[189] uuid_1.1-0                timeDate_4022.108        
[191] xml2_1.3.3                boot_1.3-28.1            
[193] IRkernel_1.3.1.9000       BiocNeighbors_1.16.0     
[195] scattermore_1.2           bit_4.0.5                
[197] spatstat.data_3.0-3       pkgconfig_2.0.3          
[199] babelgene_22.9            gargle_1.2.1             
In [5]:
rc.integrated <- readRDS("./tricycle/phloem_atlas_seu4_simplified.rds")
In [6]:
head(rc.integrated@meta.data)
A data.frame: 6 x 179
orig.identnCount_RNAnFeature_RNAcell_idsamplebarcodetotal_countsdetected_genesclusterannotation...prediction.score.T40prediction.score.T28prediction.score.T11prediction.score.T39prediction.score.T45prediction.score.T31prediction.score.T20consensus.time.group.50time.anno.Li.crudecelltype.anno.Li.crude
<fct><dbl><int><chr><chr><chr><int><int><int><chr>...<dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr><chr>
APL_AAACCCAAGTAACGTA-1APL353835176APL_AAACCCAAGTAACGTA-1APLAAACCCAAGTAACGTA-13560052109early/undetermined...0.0000000000.000000000.000000000.00000000.0000000000.01163433T0 Proliferation DomainEndodermis
APL_AAACCCACAACCAACT-1APL 64563020APL_AAACCCACAACCAACT-1APLAAACCCACAACCAACT-1 661630582PSE ...0.0000000000.000000000.040523170.00000000.0000000000.00000000T23Proliferation DomainPhloem
APL_AAACCCACAGTTGAAA-1APL127313650APL_AAACCCACAGTTGAAA-1APLAAACCCACAGTTGAAA-11278136691MSE ...0.0000000000.000000000.000000000.00000000.0030228120.02580615T21Proliferation DomainPhloem
APL_AAACCCAGTGCCTTTC-1APL 69543072APL_AAACCCAGTGCCTTTC-1APLAAACCCAGTGCCTTTC-1 713831004PPP ...0.0000000000.000000000.015078380.00000000.0000000000.14251363T18Elongation Pericycle
APL_AAACCCAGTTTCGACA-1APL 72762483APL_AAACCCAGTTTCGACA-1APLAAACCCAGTTTCGACA-1 736425296PSE ...0.0049084780.000000000.000000000.13990400.0053917600.00000000T33Maturation Phloem
APL_AAACCCATCTGCGAGC-1APL 70452849APL_AAACCCATCTGCGAGC-1APLAAACCCATCTGCGAGC-1 711428693CC ...0.0000000000.082762030.000000000.00000000.0074523010.00000000T27Proliferation DomainPhloem
In [7]:
table(rc.integrated$orig.ident)
      APL     MAKR5 MAKR5diff   PEARdel       S17      sAPL 
     5360       542      2022       649      1461       170 
In [8]:
DimPlot(rc.integrated, reduction = "umap", group.by = "annotation")
In [9]:
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno.Li.crude", label=TRUE)
In [10]:
DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno.Li.crude", label=TRUE)
In [11]:
rc.integrated
An object of class Seurat 
17396 features across 10204 samples within 1 assay 
Active assay: RNA (17396 features, 17396 variable features)
 2 dimensional reductions calculated: pca, umap
In [12]:
rc.integrated <- subset(rc.integrated, cells=colnames(rc.integrated)[which(rc.integrated$annotation=="dividing" | rc.integrated$annotation=="early PSE" | rc.integrated$annotation=="early/undetermined"| rc.integrated$annotation=="PSE"| rc.integrated$annotation=="MSE" | rc.integrated$annotation=="CC")])
In [13]:
rc.integrated <- subset(rc.integrated, cells=colnames(rc.integrated)[which(rc.integrated$time.anno.Li.crude=="Proliferation Domain")])
In [14]:
rc.integrated
An object of class Seurat 
17396 features across 3645 samples within 1 assay 
Active assay: RNA (17396 features, 17396 variable features)
 2 dimensional reductions calculated: pca, umap
In [15]:
DimPlot(rc.integrated, reduction = "umap", group.by = "annotation")
In [16]:
wanted_cols <- c("orig.ident", "annotation", "tricyclePosition","tricycleCCStage")
rc.integrated@meta.data <- rc.integrated@meta.data[,wanted_cols]
colnames(rc.integrated@meta.data) <- c("sample", "annotation","tricyclePosition","tricycleCCStage")
In [17]:
table(rc.integrated$sample)
      APL     MAKR5 MAKR5diff   PEARdel       S17      sAPL 
     1244       389      1563       357        43        49 
In [18]:
ccgl <- read.csv("./tradeseq/245_cell_cycle_related_genes_for_reference.csv")
ccgl <- ccgl$GeneID
In [19]:
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
In [20]:
table(rc.integrated$tricycleCCStage)
G1/G0   G2M     S 
 1177  1129  1339 
In [21]:
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "pca", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
In [37]:
rc.integrated <- RunPCA(rc.integrated,features = ccgl)
rc.integrated <- FindNeighbors(rc.integrated, dims = 1:10)
rc.integrated <- RunUMAP(rc.integrated, dims = 1:10)
Warning message in PrepDR(object = object, features = features, verbose = verbose):
“The following 3 features requested have not been scaled (running reduction without them): AT5G47500, AT1G33670, AT5G05900”
PC_ 1 
Positive:  AT5G55520, AT4G05190, AT1G16630, AT5G51600, AT4G14330, AT1G76310, AT4G26660, AT1G02730, AT4G01730, AT4G38062 
	   AT3G11520, AT1G03780, AT3G56100, AT3G15550, AT4G09060, AT1G20610, AT2G22610, AT5G60930, AT3G44050, AT1G18370 
	   AT3G23670, AT5G45700, AT4G35620, AT4G37490, AT3G55660, AT5G15510, AT5G56120, AT3G58650, AT3G26050, AT1G34355 
Negative:  AT3G12170, AT5G52220, AT2G20980, AT3G48490, AT1G80190, AT2G44580, AT4G00020, AT2G29680, AT3G23740, AT3G52115 
	   AT4G30860, AT4G31400, AT2G24970, AT1G75150, AT1G26330, AT3G13060, AT4G12620, AT2G37560, AT3G24495, AT1G20720 
	   AT3G27640, AT4G21070, AT3G62110, AT3G05740, AT3G59550, AT3G25100, AT3G18730, AT2G42190, AT1G07270, AT5G46740 
PC_ 2 
Positive:  AT1G07020, AT3G62110, AT5G56120, AT3G13060, AT5G15510, AT2G42190, AT4G02800, AT4G35620, AT3G03810, AT3G51230 
	   AT4G34960, AT4G17000, AT5G13840, AT3G15550, AT1G02730, AT1G03780, AT3G11520, AT1G11220, AT4G38230, AT3G55660 
	   AT3G58100, AT5G01420, AT4G26660, AT5G48310, AT1G05520, AT4G19185, AT5G45700, AT1G76310, AT5G44040, AT1G18370 
Negative:  AT3G12170, AT4G30860, AT4G00020, AT2G20980, AT2G44580, AT5G43080, AT1G69770, AT1G29418, AT3G48490, AT5G46740 
	   AT1G26330, AT3G24495, AT5G52220, AT1G80190, AT3G48160, AT3G55340, AT1G75150, AT1G77630, AT3G27640, AT3G52115 
	   AT3G23740, AT3G59550, AT4G31400, AT1G20720, AT5G02820, AT4G12620, AT5G13830, AT3G18730, AT4G21070, AT2G37560 
PC_ 3 
Positive:  AT5G56120, AT1G63100, AT3G54710, AT4G02800, AT3G51230, AT5G15510, AT5G44040, AT1G04425, AT3G55660, AT1G03780 
	   AT5G48310, AT1G18370, AT3G15550, AT1G23790, AT5G01420, AT5G13840, AT1G75640, AT4G17000, AT4G35620, AT1G11220 
	   AT1G02730, AT3G11520, AT5G45700, AT4G26660, AT3G12170, AT2G44190, AT3G48490, AT5G55830, AT2G44580, AT4G37490 
Negative:  AT1G72670, AT1G44110, AT1G10780, AT1G53140, AT1G76740, AT2G47920, AT2G36200, AT4G21270, AT5G55820, AT4G18570 
	   AT1G61450, AT3G19050, AT4G11080, AT4G13370, AT5G25380, AT1G49910, AT2G42290, AT4G21820, AT4G17240, AT5G60150 
	   AT5G60930, AT4G39630, AT1G59540, AT5G11510, AT4G12540, AT2G32590, AT5G07810, AT5G37630, AT3G58650, AT4G14150 
PC_ 4 
Positive:  AT3G55340, AT3G06840, AT1G04425, AT5G13830, AT4G34360, AT1G77550, AT3G54710, AT1G29418, AT3G13150, AT5G42700 
	   AT1G30960, AT4G15850, AT1G77630, AT1G62150, AT1G63100, AT1G75640, AT1G53542, AT2G40570, AT2G17670, AT5G12440 
	   AT4G16700, AT5G59240, AT1G09700, AT4G12740, AT5G09380, AT2G04530, AT3G46020, AT4G28020, AT1G73180, AT1G19520 
Negative:  AT3G52115, AT3G27640, AT5G43080, AT3G48490, AT4G30860, AT3G25100, AT4G21070, AT2G29680, AT5G46740, AT1G75150 
	   AT4G00020, AT2G42260, AT3G48160, AT2G20980, AT1G76740, AT4G31400, AT3G24495, AT5G20850, AT5G25380, AT1G05440 
	   AT3G12170, AT1G07270, AT4G01730, AT2G24970, AT4G35620, AT1G20610, AT5G23910, AT1G80190, AT3G18680, AT1G20720 
PC_ 5 
Positive:  AT4G22970, AT5G37630, AT1G69770, AT3G54750, AT4G15890, AT1G29418, AT5G07810, AT3G13060, AT5G63920, AT1G50240 
	   AT3G60740, AT2G32590, AT1G05520, AT1G20410, AT3G19050, AT1G72480, AT1G75640, AT1G63100, AT4G12540, AT2G44190 
	   AT4G01810, AT5G63120, AT1G18370, AT3G24495, AT5G51560, AT3G13150, AT5G13840, AT2G33610, AT5G47400, AT1G09280 
Negative:  AT3G46020, AT3G06840, AT4G34360, AT5G13830, AT3G12170, AT1G61450, AT5G14660, AT3G48490, AT5G52220, AT2G47920 
	   AT5G42700, AT2G34450, AT2G24970, AT1G53542, AT5G06590, AT4G39630, AT5G63690, AT5G09500, AT4G04190, AT5G12360 
	   AT4G11080, AT2G44580, AT1G80190, AT1G44110, AT2G20980, AT5G45700, AT4G05190, AT4G01730, AT4G09060, AT3G26050 

Computing nearest neighbor graph

Computing SNN

Warning message:
“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
14:33:22 UMAP embedding parameters a = 0.9922 b = 1.112

14:33:22 Read 3645 rows and found 10 numeric columns

14:33:22 Using Annoy for neighbor search, n_neighbors = 30

14:33:22 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:33:23 Writing NN index file to temp file /tmp/Rtmp21YxYc/file3782c157914d8

14:33:23 Searching Annoy index using 1 thread, search_k = 3000

14:33:24 Annoy recall = 100%

14:33:25 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

14:33:28 Initializing from normalized Laplacian + noise (using RSpectra)

14:33:29 Commencing optimization for 500 epochs, with 146446 positive edges

14:33:34 Optimization finished

In [41]:
#rc.integrated[["umap"]]@cell.embeddings[,1] <- rc.integrated[["umap"]]@cell.embeddings[,1]*-1
rc.integrated[["umap"]]@cell.embeddings[,2] <- rc.integrated[["umap"]]@cell.embeddings[,2]*-1
#u2 <- rc.integrated@reductions$umap@cell.embeddings[,1]
#u1 <- rc.integrated@reductions$umap@cell.embeddings[,2]
#rc.integrated@reductions$umap@cell.embeddings[,1] <- u1
#rc.integrated@reductions$umap@cell.embeddings[,2] <- u2
In [42]:
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
In [43]:
grDevices::cairo_pdf("./pdfs/Phloem_atlas_Proliferation_Domain_cells_UMAP_with_245_CCgenes.pdf",width=6, height=6)
print(DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2))
dev.off()
png: 2
In [44]:
sce <- as.SingleCellExperiment(rc.integrated)
In [45]:
sce$tricyclePosition <- rc.integrated$tricyclePosition*pi
In [46]:
options(repr.plot.width=9, repr.plot.height=6)
## Tricycle position on umap
p <- plot_emb_circle_scale(sce, dimred = "UMAP",
                       point.size = 5, point.alpha = 1) +
  theme_void(base_size = 14)
legend <- circle_scale_legend(text.size = 5, alpha = 1)
print(plot_grid(p, legend, ncol = 2, rel_widths = c(1, 0.7)))
In [47]:
grDevices::cairo_pdf("./pdfs/Phloem_atlas_Proliferation_Domain_cells_UMAP_tricyclePosition_with_245_CCgenes.pdf",width=9, height=6)
print(plot_grid(p, legend, ncol = 2, rel_widths = c(1, 0.7)))
dev.off()
png: 2
In [48]:
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "pca", group.by = "tricycleCCStage", label = FALSE, pt.size=2)

Inspect gene expression over tricycle position¶

In [49]:
pltsg <- function(gene){
    keygene.idx <- which(rownames(rc.integrated@assays$RNA@data) == gene)
    fit.l <- fit_periodic_loess(rc.integrated$tricyclePosition*pi,
                            rc.integrated@assays$RNA@data[keygene.idx,],
                            plot = FALSE, span=0.3,
                       x_lab = "Cell cycle position \u03b8", y_lab = "log2(keygene)")
    fit <- fit.l$pred.df
    options(repr.plot.width=8, repr.plot.height=6)
    print(ggplot(fit, aes(x=x,y=y)) + geom_path(size = 1, alpha = 1)+ labs(x = "θ", y = "smoothed gene expression", title = gene) + 
                scale_x_continuous(limits = c(0, 2 * pi), breaks = c(0, 
                    pi/2, pi, 3 * pi/2, 2 * pi), labels = paste0(c(0, 
                    0.5, 1, 1.5, 2), "π"), name = "θ")+
      #              scale_x_continuous(limits = c(0, 200), breaks = c(0, 
      #              50, 100, 150, 200), labels = paste0(c(0, 
      #              0.5, 1, 1.5, 2), "π"), name = "θ")+
      theme_bw(base_size = 14)+
      theme(legend.title=element_blank()))
}
In [50]:
## DWF4
grDevices::cairo_pdf("Phloem_atlas_DWF4.pdf",width=8, height=6)
pltsg('AT3G50660')
dev.off()

pltsg('AT3G50660')
Warning message:
“Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.”
png: 2
In [51]:
## OPS
grDevices::cairo_pdf("Phloem_atlas_OPS.pdf",width=8, height=6)
pltsg('AT3G09070')
dev.off()

pltsg('AT3G09070')
png: 2
In [52]:
## OPL2
grDevices::cairo_pdf("Phloem_atlas_OPL2.pdf",width=8, height=6)
pltsg('AT2G38070')
dev.off()

pltsg('AT2G38070')
png: 2
In [53]:
## G1 marker reported by Laura Lee
pltsg('AT5G21940')
In [54]:
## S marker reported by Laura Lee
pltsg('AT5G10390')
In [55]:
## G2M marker reported by Laura Lee
pltsg('AT4G23800')

Load gene lists¶

In [22]:
DefaultAssay(rc.integrated) <- "RNA"
In [23]:
nrow(rc.integrated)
17396
In [24]:
GL <- read.csv("./tradeseq/BES1_BZR1_2X_ChIP_targets.csv")
GL <- intersect(GL$GeneID,rownames(rc.integrated))
GL2 <- read.csv("./tradeseq/BES1_BZR1_target_BL2hr_vs_BRZ.csv")
GL2up <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_up")
GL2up <- intersect(GL2up$GeneID, rownames(rc.integrated))
GL2down <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_down")
GL2down <- intersect(GL2down$GeneID, rownames(rc.integrated))
GL2mixed <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_mixed")
GL2mixed <- intersect(GL2mixed$GeneID, rownames(rc.integrated))
GL3 <- read.csv("./tradeseq/MYB3R4_Targets.csv")
GL3 <- intersect(GL3$GeneID, rownames(rc.integrated))
In [25]:
DE <- read.csv("./tradeseq/v4_BL2hr_v_BRZ_cell_time_EdgeR_q0.05_FC1.5_r_v_4_20220112.csv", row.names = 1)
DE <- DE %>% filter(cluster_id == "Proliferation Domain_Atrichoblast")
head(DE)
A data.frame: 6 x 28
genecluster_idsc_2.cpmsc_43.cpmsc_50.cpmsc_5.cpmsc_46.cpmsc_49.cpmsc_2.frqsc_43.frq...Fp_valp_adj.locp_adj.glbcontrastNameTF_NameDescriptionup_dn_labelclust_up_dn
<chr><chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>...<dbl><dbl><dbl><dbl><chr><chr><chr><chr><chr><chr>
1AT3G50660Proliferation Domain_Atrichoblast 66.5 53.90 58.10 3.04 1.22 1.310.77800.6530...6031.14e-262.56e-227.02e-21BL-BRZCYP90B1 NA NA DownProliferation Domain_Atrichoblast_Down
2AT4G36780Proliferation Domain_Atrichoblast227.0167.00196.00 17.90 12.50 12.700.94900.8930...5516.89e-267.70e-224.22e-20BL-BRZBEH2 BEH2BES1/BZR1 homolog 2DownProliferation Domain_Atrichoblast_Down
3AT4G01630Proliferation Domain_Atrichoblast 12.0 6.87 11.50209.00142.00142.000.25300.1480...5083.38e-252.52e-212.07e-19BL-BRZEXPA17 NA NA Up Proliferation Domain_Atrichoblast_Up
4AT2G43890Proliferation Domain_Atrichoblast 2.4 1.77 2.28110.00 31.40 41.300.06060.0473...4542.98e-241.67e-201.83e-18BL-BRZAT2G43890NA NA Up Proliferation Domain_Atrichoblast_Up
5AT4G16780Proliferation Domain_Atrichoblast 54.0 57.00 56.20 3.99 4.29 5.760.68700.7080...4062.55e-231.14e-191.56e-17BL-BRZHAT4 HB-2homeobox protein 2 DownProliferation Domain_Atrichoblast_Down
6AT1G07985Proliferation Domain_Atrichoblast162.0176.00228.00 23.00 21.80 16.400.89900.7460...3632.25e-228.40e-191.38e-16BL-BRZAT1G07985NA NA DownProliferation Domain_Atrichoblast_Down

Prepare data for WCGNA¶

In [60]:
rc.integrated
An object of class Seurat 
17396 features across 3645 samples within 1 assay 
Active assay: RNA (17396 features, 17396 variable features)
 2 dimensional reductions calculated: pca, umap
In [61]:
table(rc.integrated$sample)
      APL     MAKR5 MAKR5diff   PEARdel       S17      sAPL 
     1244       389      1563       357        43        49 
In [62]:
y.list <- c()
for (gene in rownames(rc.integrated)){
keygene.idx <- which(rownames(rc.integrated) == gene)
fit.l <- fit_periodic_loess(rc.integrated$tricyclePosition*pi,
                            rc.integrated@assays$RNA@data[keygene.idx,],
                            plot = FALSE, span=0.3,
                       x_lab = "Cell cycle position \u03b8", y_lab = "log2(keygene)")
fit <- fit.l$pred.df

y.list <- cbind(y.list,fit$y)
}
In [63]:
colnames(y.list) <- rownames(rc.integrated)
In [64]:
write.csv(y.list, "./tradeseq/All_genes_Phloem_atlas_cells_tricycle.csv")
In [65]:
y.list <- read.csv("./tradeseq/All_genes_Phloem_atlas_cells_tricycle.csv", row.names=1)
y.list <- as.matrix(y.list)
In [66]:
head(y.list)
A matrix: 6 x 17396 of type dbl
AT1G01010AT1G01020AT1G01030AT1G01040AT1G01050AT1G01060AT1G01070AT1G01080AT1G01090AT1G01100...AT5G67540AT5G67560AT5G67570AT5G67580AT5G67590AT5G67600AT5G67610AT5G67620AT5G67630AT5G67640
10.0028516380.26837340.0036602500.049523321.1435980.0033883420.0038085420.0082762800.58279333.244628...0.094877810.33552830.054385830.060663940.97696791.4778550.065910790.00044775560.16592550.07384151
20.0027953920.26864080.0036106210.049775691.1510580.0033669450.0038947080.0081810320.58207643.241383...0.096003740.33797920.054492680.061670510.98059641.4879690.065392670.00047342490.16298900.07338066
30.0027406280.26893240.0035599510.050046241.1585250.0033515050.0039804970.0080886920.58135353.238083...0.097148550.34046880.054615080.062717700.98419491.4982290.064884930.00050013820.16005460.07292797
40.0026873710.26924760.0035083130.050334161.1659930.0033418620.0040657930.0079993230.58062563.234734...0.098310480.34299350.054752360.063803370.98776111.5086210.064387690.00052784720.15712500.07248405
50.0026356420.26958570.0034557830.050638631.1734520.0033378580.0041504780.0079129870.57989393.231341...0.099487770.34554970.054903870.064925410.99129261.5191310.063901050.00055650330.15420290.07204952
60.0025854660.26994580.0034024340.050958851.1808960.0033393330.0042344350.0078297460.57915953.227909...0.100678660.34813360.055068950.066081670.99478721.5297470.063425110.00058605840.15129110.07162501
In [67]:
nrow(y.list)
200
In [68]:
## Remove zeros in y.list()
y.list <- y.list[,-as.numeric(which(colSums(y.list)==0))]
In [69]:
ncol(y.list)
17386

WGCNA¶

In [70]:
allowWGCNAThreads()
Allowing multi-threading with up to 94 threads.
In [71]:
# Choose a set of soft-thresholding powers
powers = c(c(1:100))
In [72]:
# Call the network topology analysis function
sft = pickSoftThreshold(
  y.list,             # <= Input data
  #blockSize = 30,
  powerVector = powers,
  verbose = 5
  )
pickSoftThreshold: will use block size 2573.
 pickSoftThreshold: calculating connectivity for given powers...
   ..working on genes 1 through 2573 of 17386
   ..working on genes 2574 through 5146 of 17386
   ..working on genes 5147 through 7719 of 17386
   ..working on genes 7720 through 10292 of 17386
   ..working on genes 10293 through 12865 of 17386
   ..working on genes 12866 through 15438 of 17386
   ..working on genes 15439 through 17386 of 17386
    Power SFT.R.sq     slope truncated.R.sq mean.k. median.k. max.k.
1       1 8.96e-01  4.250000         0.9000   10900     11900  13100
2       2 8.51e-01  2.070000         0.8600    8360      9370  11200
3       3 7.54e-01  1.260000         0.7090    6880      7750  10000
4       4 6.52e-01  0.732000         0.5550    5890      6600   9140
5       5 5.30e-01  0.486000         0.4280    5170      5720   8450
6       6 3.67e-01  0.318000         0.2970    4620      5040   7880
7       7 2.14e-01  0.208000         0.1800    4170      4480   7400
8       8 9.17e-02  0.121000         0.1200    3810      4030   6990
9       9 2.33e-02  0.055500         0.0888    3510      3650   6630
10     10 2.92e-07  0.000188         0.0838    3250      3330   6310
11     11 1.85e-02 -0.046300         0.1280    3030      3050   6030
12     12 6.97e-02 -0.088900         0.1980    2840      2810   5770
13     13 1.35e-01 -0.124000         0.2710    2670      2600   5540
14     14 2.08e-01 -0.155000         0.3450    2520      2410   5330
15     15 2.78e-01 -0.180000         0.4040    2380      2250   5130
16     16 3.60e-01 -0.209000         0.4890    2260      2110   4950
17     17 4.24e-01 -0.232000         0.5500    2150      1980   4790
18     18 4.90e-01 -0.255000         0.6100    2050      1860   4630
19     19 5.46e-01 -0.276000         0.6610    1960      1760   4490
20     20 5.91e-01 -0.294000         0.6970    1870      1660   4350
21     21 6.34e-01 -0.311000         0.7400    1790      1580   4220
22     22 6.75e-01 -0.327000         0.7790    1720      1500   4100
23     23 7.05e-01 -0.343000         0.8090    1660      1430   3990
24     24 7.34e-01 -0.357000         0.8340    1600      1360   3890
25     25 7.58e-01 -0.370000         0.8540    1540      1300   3780
26     26 7.77e-01 -0.382000         0.8660    1480      1240   3690
27     27 7.99e-01 -0.394000         0.8800    1430      1190   3600
28     28 8.14e-01 -0.404000         0.8910    1390      1140   3510
29     29 8.25e-01 -0.415000         0.8990    1340      1090   3430
30     30 8.39e-01 -0.425000         0.9060    1300      1050   3350
31     31 8.51e-01 -0.434000         0.9160    1260      1010   3280
32     32 8.63e-01 -0.443000         0.9230    1220       974   3210
33     33 8.70e-01 -0.452000         0.9270    1190       940   3140
34     34 8.82e-01 -0.460000         0.9340    1160       907   3080
35     35 8.91e-01 -0.468000         0.9410    1120       876   3010
36     36 8.96e-01 -0.475000         0.9430    1090       846   2950
37     37 8.99e-01 -0.483000         0.9450    1070       819   2900
38     38 9.04e-01 -0.490000         0.9480    1040       793   2840
39     39 9.08e-01 -0.497000         0.9510    1010       769   2790
40     40 9.13e-01 -0.503000         0.9530     988       746   2740
41     41 9.16e-01 -0.509000         0.9560     965       724   2690
42     42 9.21e-01 -0.516000         0.9590     942       702   2640
43     43 9.24e-01 -0.521000         0.9610     920       682   2590
44     44 9.26e-01 -0.526000         0.9620     899       663   2550
45     45 9.30e-01 -0.532000         0.9630     879       644   2500
46     46 9.33e-01 -0.537000         0.9650     860       627   2460
47     47 9.35e-01 -0.541000         0.9680     842       611   2420
48     48 9.38e-01 -0.545000         0.9710     824       596   2380
49     49 9.41e-01 -0.550000         0.9730     807       581   2350
50     50 9.42e-01 -0.554000         0.9750     791       566   2310
51     51 9.44e-01 -0.560000         0.9740     775       550   2270
52     52 9.45e-01 -0.565000         0.9740     760       537   2240
53     53 9.47e-01 -0.569000         0.9750     745       523   2210
54     54 9.48e-01 -0.573000         0.9760     731       511   2180
55     55 9.49e-01 -0.577000         0.9760     717       499   2140
56     56 9.51e-01 -0.580000         0.9760     704       488   2110
57     57 9.53e-01 -0.584000         0.9760     691       477   2080
58     58 9.55e-01 -0.587000         0.9770     679       466   2060
59     59 9.56e-01 -0.591000         0.9760     667       456   2030
60     60 9.57e-01 -0.595000         0.9760     656       446   2000
61     61 9.57e-01 -0.598000         0.9760     644       436   1970
62     62 9.57e-01 -0.601000         0.9760     633       427   1950
63     63 9.58e-01 -0.605000         0.9760     623       418   1920
64     64 9.58e-01 -0.609000         0.9750     613       409   1900
65     65 9.58e-01 -0.612000         0.9740     603       401   1880
66     66 9.59e-01 -0.616000         0.9730     593       393   1850
67     67 9.60e-01 -0.618000         0.9750     584       386   1830
68     68 9.62e-01 -0.621000         0.9760     575       379   1810
69     69 9.62e-01 -0.624000         0.9750     566       371   1790
70     70 9.63e-01 -0.627000         0.9760     557       364   1760
71     71 9.63e-01 -0.629000         0.9750     549       358   1740
72     72 9.64e-01 -0.632000         0.9750     541       351   1720
73     73 9.64e-01 -0.634000         0.9750     533       345   1700
74     74 9.64e-01 -0.637000         0.9750     525       338   1680
75     75 9.64e-01 -0.640000         0.9740     517       333   1670
76     76 9.64e-01 -0.643000         0.9740     510       327   1650
77     77 9.65e-01 -0.646000         0.9740     503       321   1630
78     78 9.65e-01 -0.648000         0.9720     496       316   1610
79     79 9.65e-01 -0.651000         0.9730     489       310   1590
80     80 9.65e-01 -0.653000         0.9730     482       304   1580
81     81 9.65e-01 -0.655000         0.9730     476       299   1560
82     82 9.67e-01 -0.658000         0.9740     470       294   1540
83     83 9.67e-01 -0.659000         0.9750     464       289   1530
84     84 9.67e-01 -0.661000         0.9740     457       284   1510
85     85 9.67e-01 -0.663000         0.9740     452       280   1500
86     86 9.67e-01 -0.665000         0.9740     446       276   1480
87     87 9.67e-01 -0.668000         0.9730     440       271   1470
88     88 9.67e-01 -0.670000         0.9720     435       267   1450
89     89 9.67e-01 -0.672000         0.9710     429       263   1440
90     90 9.67e-01 -0.674000         0.9710     424       260   1420
91     91 9.67e-01 -0.676000         0.9700     419       256   1410
92     92 9.68e-01 -0.678000         0.9700     414       252   1400
93     93 9.69e-01 -0.680000         0.9710     409       248   1380
94     94 9.70e-01 -0.682000         0.9720     404       244   1370
95     95 9.69e-01 -0.685000         0.9700     399       241   1360
96     96 9.69e-01 -0.688000         0.9690     395       238   1350
97     97 9.68e-01 -0.689000         0.9690     390       235   1340
98     98 9.69e-01 -0.692000         0.9690     386       232   1320
99     99 9.68e-01 -0.694000         0.9670     381       228   1310
100   100 9.67e-01 -0.695000         0.9660     377       225   1300
In [73]:
options(repr.plot.width=24, repr.plot.height=6)
par(mfrow = c(1,2));
cex1 = 0.9;

plot(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)",
     ylab = "Scale Free Topology Model Fit, signed R^2",
     main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
     -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     xlab = "Soft Threshold (power)",
     ylab = "Mean Connectivity",
     type = "n",
     main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
     sft$fitIndices[, 5],
     labels = powers,
     cex = cex1, col = "red")
In [74]:
picked_power = 30
temp_cor <- cor       
cor <- WGCNA::cor         # Force it to use WGCNA cor function (fix a namespace conflict issue)
netwk <- blockwiseModules(y.list,                # <= input here

                          # == Adjacency Function ==
                          power = picked_power,                # <= power here
                          networkType = "signed",

                          # == Tree and Block Options ==
                          deepSplit = 2,
                          pamRespectsDendro = F,
                          # detectCutHeight = 0.75,
                          minModuleSize = 200,
                          maxBlockSize = 4000,

                          # == Module Adjustments ==
                          reassignThreshold = 0,
                          mergeCutHeight = 0.25,

                          # == TOM == Archive the run results in TOM file (saves time)
                          saveTOMs = T,
                          saveTOMFileBase = "ER",

                          # == Output Options
                          numericLabels = T,
                          verbose = 3)
 Calculating module eigengenes block-wise from all genes
   Flagging genes and samples with too many missing values...
    ..step 1
 ....pre-clustering genes to determine blocks..
   Projective K-means:
   ..k-means clustering..
   ..merging smaller clusters...
Block sizes:
gBlocks
   1    2    3    4    5    6 
3874 3289 3234 2974 2659 1356 
 ..Working on block 1 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 1 into file ER-block.1.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
     ..removing 1 genes from module 1 because their KME is too low.
     ..removing 10 genes from module 2 because their KME is too low.
 ..Working on block 2 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 2 into file ER-block.2.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
 ..Working on block 3 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 3 into file ER-block.3.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
 ..Working on block 4 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 4 into file ER-block.4.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
 ..Working on block 5 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 5 into file ER-block.5.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
 ..Working on block 6 .
    TOM calculation: adjacency..
    ..will use 94 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
   ..saving TOM for block 6 into file ER-block.6.RData
 ....clustering..
 ....detecting modules..
 ....calculating module eigengenes..
 ....checking kME in modules..
     ..removing 2 genes from module 3 because their KME is too low.
 ..merging modules that are too close..
     mergeCloseModules: Merging modules whose distance is less than 0.25
       Calculating new MEs...
In [75]:
options(repr.plot.width=24, repr.plot.height=6)
# Convert labels to colors for plotting
mergedColors = labels2colors(netwk$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
  netwk$dendrograms[[1]],
  mergedColors[netwk$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE,
  hang = 0.03,
  addGuide = TRUE,
  guideHang = 0.05 )
In [76]:
head(netwk$colors[netwk$blockGenes[[1]]])
AT1G01070
2
AT1G01100
1
AT1G01110
1
AT1G01210
1
AT1G01220
2
AT1G01225
2
In [77]:
table(netwk$colors)
   0    1    2    3    4    5    6    7    8    9   10 
  13 6758 6649  818  715  687  503  384  304  291  264 
In [95]:
module_df <- data.frame(
  gene_id = names(netwk$colors),
  colors = labels2colors(netwk$colors)
)
In [96]:
module_df[1:5,]
A data.frame: 5 x 2
gene_idcolors
<chr><chr>
1AT1G01010turquoise
2AT1G01020red
3AT1G01030turquoise
4AT1G01040turquoise
5AT1G01050blue
In [97]:
table(module_df$colors)
    black      blue     brown     green      grey   magenta      pink    purple 
      384      6649       818       687        13       291       304       264 
      red turquoise    yellow 
      503      6758       715 
In [98]:
options(repr.plot.width=8, repr.plot.height=8)
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(y.list, mergedColors)$eigengenes

# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)

# Add treatment names
MEs0$treatment = row.names(MEs0)

# tidy & plot data
mME = MEs0 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order)
  )

#mME <- mME[-which(mME$name == "grey"),] 
mME$treatment <- factor(mME$treatment, levels = seq(1,200))
mME$tricycle_position <- mME$treatment
#mME$name <- factor(mME$name, levels = rev(c("green", "grey", "black", "blue", "brown", "turquoise", "yellow", "red", "pink", "magenta")))
mME$name <- factor(mME$name, levels = rev(c("brown", "red", "black", "green", "turquoise", "yellow", "blue", "magenta", "pink","purple","grey")))
mME$module <- as.character(mME$name)
#mME$module[which(mME$module=="green")]="G1-M1"
#mME$module[which(mME$module=="grey")]="G1-M2"
#mME$module[which(mME$module=="blue")]="G1-M3"
#mME$module[which(mME$module=="black")]="G1-M4"
#mME$module[which(mME$module=="brown")]="G1S-M5"
#mME$module[which(mME$module=="turquoise")]="G1S-M6"
#mME$module[which(mME$module=="yellow")]="S-M7"
#mME$module[which(mME$module=="magenta")]="G2M-M8"
#mME$module[which(mME$module=="red")]="G2MG1-M9"
#mME$module[which(mME$module=="pink")]="G1S-M10"
#mME$module[which(mME$module=="grey")]="Unassigned"
#mME$module <- factor(mME$module, levels = rev(c("G1-M1", "G1-M2", "G1-M3", "G1-M4", "G1S-M5", "G1S-M6", "S-M7", "G2M-M8", "G2MG1-M9", "G1S-M10", "Unassigned")))

mME %>% ggplot(., aes(x=tricycle_position, y=name, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
  #  limit = c(-1,1)
  ) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr")

Module GO¶

In [99]:
## Prepare gene module list
module_sel <- select(module_df, gene_id, colors)
module_list <- split(module_sel, f=module_sel$colors)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list  <- lapply(module_list, function(x) x[names(x)=="gene_id"])
# convert each sublist into character and eliminate duplicates
module_list  <- lapply(module_list, function(x) as.character(unique(x$gene_id)))
In [100]:
cluster_GO <- gost(module_list, organism = "athaliana", correction_method = "fdr", significant = F, multi_query = F)

cluster_GO_df <- cluster_GO[[1]]

cluster_GO_sig <- filter(cluster_GO_df, p_value<=0.01)

# top  terms for each cluster

cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(query) %>%
  top_n(5, wt = -p_value) %>%
  arrange(desc(p_value)) -> top_GO

GO_n <- cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(term_id) %>%
  tally() %>%
  arrange(desc(n))


GO_n <- dplyr::rename(GO_n, "n_clusters"=n)

cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)

# get all terms for the top ones so that all clusters have values

top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)


#spread and plot


top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)

spread_GO <- spread(top_GO_sel, key = query, p_value)

spread_GO[is.na(spread_GO)] <- 1

spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
In [101]:
options(repr.plot.width = 16, repr.plot.height = 10)

GO_hm <- Heatmap(spread_GO_m, 
                 name = "-log10_pval", 
                 heatmap_legend_param = list(title_position="topcenter", color_bar = "continuous"), 
                 col = colorRamp2(c(0, 10), 
                                  c("beige", "#e31a1c")), 
                 cluster_rows = T,
                 cluster_columns = T, 
                 use_raster= FALSE, 
                 show_column_names = TRUE, 
                 show_row_names = TRUE, 
                 show_row_dend = TRUE, 
                 show_column_dend = TRUE, 
                 clustering_distance_rows = "pearson",
                 clustering_distance_columns = "pearson", 
                 row_names_gp = gpar(fontsize = 12)) 


# padding - bottom, left, top, right
draw(GO_hm, padding = unit(c(15, 15, 5, 80), "mm"), heatmap_legend_side = "left")

Remove grey¶

In [102]:
options(repr.plot.width=8, repr.plot.height=8)
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(y.list, mergedColors)$eigengenes

# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)

# Add treatment names
MEs0$treatment = row.names(MEs0)

# tidy & plot data
mME = MEs0 %>%
  pivot_longer(-treatment) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order)
  )


mME <- mME[-which(mME$name == "grey"),] 
mME$treatment <- factor(mME$treatment, levels = seq(1,200))
mME$tricycle_position <- mME$treatment
#mME$name <- factor(mME$name, levels = rev(c("green", "grey", "black", "blue", "brown", "turquoise", "yellow", "red", "pink", "magenta")))
mME$name <- factor(mME$name, levels = rev(c("magenta", "blue", "black", "red", "green","purple", "turquoise", "pink", "yellow", "brown")))
mME$module <- as.character(mME$name)
mME$module[which(mME$module=="magenta")]="G1-M1"
mME$module[which(mME$module=="blue")]="G1-M2"
mME$module[which(mME$module=="black")]="G1S-M3"
mME$module[which(mME$module=="red")]="G1S-M4"
mME$module[which(mME$module=="green")]="G1S-M5"
mME$module[which(mME$module=="purple")]="G1S-M6"
mME$module[which(mME$module=="turquoise")]="S-M7"
mME$module[which(mME$module=="pink")]="S-M8"
mME$module[which(mME$module=="yellow")]="G2MG1-M9"
mME$module[which(mME$module=="brown")]="G2MG1-M10"
mME$module <- factor(mME$module, levels = rev(c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")))

mME %>% ggplot(., aes(x=tricycle_position, y=module, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
  #  limit = c(-1,1)
  ) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr")
In [103]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Plot_CC.pdf",width=8, height=8)
print(mME %>% ggplot(., aes(x=tricycle_position, y=module, fill=value)) +
  geom_tile() +
  theme_bw() +
  scale_fill_gradient2(
    low = "blue",
    high = "red",
    mid = "white",
    midpoint = 0,
  #  limit = c(-1,1)
  ) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr"))
dev.off()
png: 2
In [104]:
head(MEs0)
A data.frame: 6 x 12
MEgreenMEredMEblackMEblueMEpurpleMEturquoiseMEmagentaMEpinkMEbrownMEyellowMEgreytreatment
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr>
1-0.06292724-0.11280193-0.09476705-0.00709582590.03907418-0.036791460.0064695310.014712720.104651040.081817430.053034031
2-0.06319146-0.11042494-0.09141089-0.00387136760.03500705-0.038810490.0085948240.014576420.103788870.078973680.055303982
3-0.06343191-0.10791123-0.08787816-0.00059450440.03082476-0.040810870.0107974540.014480430.102827420.076039970.057416473
4-0.06364731-0.10526671-0.08417839 0.00272978180.02653601-0.042791180.0130714240.014422130.101769780.073021210.059371554
5-0.06383638-0.10249730-0.08032112 0.00609650920.02214945-0.044750020.0154107430.014398880.100619090.069922270.061169285
6-0.06399781-0.09960891-0.07631589 0.00950069590.01767377-0.046685980.0178094150.014408060.099378450.066748060.062809716
In [105]:
module_df$colors <- factor(module_df$colors, levels=c("magenta", "blue", "black", "red", "green","purple", "turquoise", "pink", "yellow", "brown"))
table(module_df$colors)
  magenta      blue     black       red     green    purple turquoise      pink 
      291      6649       384       503       687       264      6758       304 
   yellow     brown 
      715       818 
In [106]:
module_df$module <- as.character(module_df$colors)
module_df$module[which(module_df$module=="magenta")]="G1-M1"
module_df$module[which(module_df$module=="blue")]="G1-M2"
module_df$module[which(module_df$module=="black")]="G1S-M3"
module_df$module[which(module_df$module=="red")]="G1S-M4"
module_df$module[which(module_df$module=="green")]="G1S-M5"
module_df$module[which(module_df$module=="purple")]="G1S-M6"
module_df$module[which(module_df$module=="turquoise")]="S-M7"
module_df$module[which(module_df$module=="pink")]="S-M8"
module_df$module[which(module_df$module=="yellow")]="G2MG1-M9"
module_df$module[which(module_df$module=="brown")]="G2MG1-M10"

module_df$module <- factor(module_df$module, levels = c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))

table(module_df$module)
    G1-M1     G1-M2    G1S-M3    G1S-M4    G1S-M5    G1S-M6      S-M7      S-M8 
      291      6649       384       503       687       264      6758       304 
 G2MG1-M9 G2MG1-M10 
      715       818 
In [107]:
module_df$BES_up <- rep("No",nrow(module_df))
module_df$BES_up[match(GL2up, module_df$gene_id)[!is.na(match(GL2up, module_df$gene_id))]]="Yes"
module_df$BES_down <- rep("No",nrow(module_df))
module_df$BES_down[match(GL2down, module_df$gene_id)[!is.na(match(GL2down, module_df$gene_id))]]="Yes"
module_df$MYB3R4 <- rep("No",nrow(module_df))
module_df$MYB3R4[match(GL3, module_df$gene_id)[!is.na(match(GL3, module_df$gene_id))]]="Yes"
In [108]:
## BES_up
round((table(module_df$module,module_df$BES_up)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$BES_up)/as.numeric(table(module_df$module)))[,2]),2)
G1-M1
0.05
G1-M2
0.19
G1S-M3
0.07
G1S-M4
0.1
G1S-M5
0.11
G1S-M6
0.06
S-M7
0.06
S-M8
0.11
G2MG1-M9
0.1
G2MG1-M10
0.17
In [109]:
table(module_df$module,module_df$BES_up)
           
              No  Yes
  G1-M1      286    5
  G1-M2     6230  419
  G1S-M3     375    9
  G1S-M4     486   17
  G1S-M5     661   26
  G1S-M6     259    5
  S-M7      6631  127
  S-M8       293   11
  G2MG1-M9   692   23
  G2MG1-M10  772   46
In [110]:
## BES_down
round((table(module_df$module,module_df$BES_down)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$BES_down)/as.numeric(table(module_df$module)))[,2]),2)
G1-M1
0.13
G1-M2
0.11
G1S-M3
0.1
G1S-M4
0.13
G1S-M5
0.12
G1S-M6
0.06
S-M7
0.07
S-M8
0.11
G2MG1-M9
0.09
G2MG1-M10
0.09
In [111]:
table(module_df$module,module_df$BES_down)
           
              No  Yes
  G1-M1      274   17
  G1-M2     6323  326
  G1S-M3     366   18
  G1S-M4     472   31
  G1S-M5     650   37
  G1S-M6     257    7
  S-M7      6527  231
  S-M8       289   15
  G2MG1-M9   685   30
  G2MG1-M10  785   33
In [112]:
## MYB3R4
round((table(module_df$module,module_df$MYB3R4)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$MYB3R4)/as.numeric(table(module_df$module)))[,2]),2)
G1-M1
0.05
G1-M2
0.04
G1S-M3
0
G1S-M4
0.01
G1S-M5
0
G1S-M6
0
S-M7
0.03
S-M8
0.02
G2MG1-M9
0.15
G2MG1-M10
0.69
In [113]:
table(module_df$module,module_df$MYB3R4)
           
              No  Yes
  G1-M1      289    2
  G1-M2     6616   33
  G1S-M3     384    0
  G1S-M4     502    1
  G1S-M5     687    0
  G1S-M6     264    0
  S-M7      6734   24
  S-M8       303    1
  G2MG1-M9   700   15
  G2MG1-M10  741   77
In [114]:
write.csv(module_df, "./tradeseq/All_genes_Phloem_Atlas_tricycle_module.csv")
In [115]:
# pick out a few modules of interest here
modules_of_interest = c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")

# Pull out list of genes in that module
submod = module_df %>%
  subset(module %in% modules_of_interest)

row.names(module_df) = module_df$gene_id

# Get normalized expression for those genes
expr_normalized <- t(apply(y.list,2, function(x){(x-min(x))/(max(x)-min(x))}))

subexpr = expr_normalized[submod$gene_id,]

submod_df = data.frame(subexpr) %>%
  mutate(
    gene_id = row.names(.)
  ) %>%
  pivot_longer(-gene_id) %>%
  mutate(
    module = module_df[gene_id,]$module
  )

submod_df$name <- factor(submod_df$name, levels = paste0("X",seq(1,200)))
submod_df$module <- factor(submod_df$module, levels =c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))

submod_df %>% ggplot(., aes(x=name, y=value, group=gene_id)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90)
  ) +
  facet_grid(rows = vars(module)) +
  labs(x = "tricycle position",
       y = "scaled expression")
In [116]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Exp_Plot_CC.pdf",width=8, height=8)
print(submod_df %>% ggplot(., aes(x=name, y=value, group=gene_id)) +
  geom_line(aes(color = module),
            alpha = 0.2) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90)
  ) +
  facet_grid(rows = vars(module)) +
  labs(x = "tricycle position",
       y = "scaled expression"))
dev.off()
png: 2

Module Membership¶

In [125]:
ME <- moduleEigengenes(y.list, mergedColors)$eigengenes
In [126]:
head(ME)
A data.frame: 6 x 11
MEblackMEblueMEbrownMEgreenMEgreyMEmagentaMEpinkMEpurpleMEredMEturquoiseMEyellow
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1-0.09476705-0.00709582590.10465104-0.062927240.053034030.0064695310.014712720.03907418-0.11280193-0.036791460.08181743
2-0.09141089-0.00387136760.10378887-0.063191460.055303980.0085948240.014576420.03500705-0.11042494-0.038810490.07897368
3-0.08787816-0.00059450440.10282742-0.063431910.057416470.0107974540.014480430.03082476-0.10791123-0.040810870.07603997
4-0.08417839 0.00272978180.10176978-0.063647310.059371550.0130714240.014422130.02653601-0.10526671-0.042791180.07302121
5-0.08032112 0.00609650920.10061909-0.063836380.061169280.0154107430.014398880.02214945-0.10249730-0.044750020.06992227
6-0.07631589 0.00950069590.09937845-0.063997810.062809710.0178094150.014408060.01767377-0.09960891-0.046685980.06674806
In [127]:
ME <- ME[,c("MEmagenta", "MEblue", "MEblack", "MEred","MEgreen", "MEpurple", "MEturquoise", "MEpink", "MEyellow", "MEbrown")]
In [128]:
colnames(ME)[which(colnames(ME)=="magenta")]="G1-M1"
colnames(ME)[which(colnames(ME)=="blue")]="G1-M2"
colnames(ME)[which(colnames(ME)=="black")]="G1S-M3"
colnames(ME)[which(colnames(ME)=="red")]="G1S-M4"
colnames(ME)[which(colnames(ME)=="green")]="G1S-M5"
colnames(ME)[which(colnames(ME)=="purple")]="G1S-M6"
colnames(ME)[which(colnames(ME)=="turquoise")]="S-M7"
colnames(ME)[which(colnames(ME)=="pink")]="S-M8"
colnames(ME)[which(colnames(ME)=="yellow")]="G2MG1-M9"
colnames(ME)[which(colnames(ME)=="brown")]="G2MG1-M10"
In [129]:
head(ME)
A data.frame: 6 x 10
MEmagentaMEblueMEblackMEredMEgreenMEpurpleMEturquoiseMEpinkMEyellowMEbrown
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
10.006469531-0.0070958259-0.09476705-0.11280193-0.062927240.03907418-0.036791460.014712720.081817430.10465104
20.008594824-0.0038713676-0.09141089-0.11042494-0.063191460.03500705-0.038810490.014576420.078973680.10378887
30.010797454-0.0005945044-0.08787816-0.10791123-0.063431910.03082476-0.040810870.014480430.076039970.10282742
40.013071424 0.0027297818-0.08417839-0.10526671-0.063647310.02653601-0.042791180.014422130.073021210.10176978
50.015410743 0.0060965092-0.08032112-0.10249730-0.063836380.02214945-0.044750020.014398880.069922270.10061909
60.017809415 0.0095006959-0.07631589-0.09960891-0.063997810.01767377-0.046685980.014408060.066748060.09937845
In [130]:
datKME = signedKME(y.list[,module_df$gene_id[which(module_df$module != "Unassigned")]], ME, outputColumnName = "kME")
In [131]:
colnames(datKME) <- paste0("kME_",c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))
In [132]:
head(datKME)
A data.frame: 6 x 10
kME_G1-M1kME_G1-M2kME_G1S-M3kME_G1S-M4kME_G1S-M5kME_G1S-M6kME_S-M7kME_S-M8kME_G2MG1-M9kME_G2MG1-M10
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
AT1G01010-0.47635424-0.82896438 0.12114626 0.36334674 0.3858923 0.54098776 0.9592199 0.09904726 0.0367631-0.7066345
AT1G01020-0.46743291 0.03862948 0.45900966 0.96528162 0.8442728-0.08192871 0.2245726-0.54946464-0.8431030-0.9562942
AT1G01030-0.00883007-0.84503027-0.37749861-0.63440726-0.4577792 0.63722195 0.6936058 0.63137073 0.8968953 0.2902137
AT1G01040-0.13993421-0.19381788 0.71774431 0.85715816 0.4612264-0.16769257 0.5411004-0.02071880-0.5418415-0.9031343
AT1G01050 0.38247055 0.95869905 0.07054638-0.02776636-0.1258440-0.66810288-0.9936319-0.30285984-0.3706164 0.4249815
AT1G01060-0.37395407-0.66413160 0.34985813 0.55427039 0.4150461 0.31508079 0.8824949 0.08371773-0.1560908-0.8163872
In [133]:
write.csv(left_join(rownames_to_column(datKME), rownames_to_column(module_df), by=c("rowname")), "./tradeseq/All_genes_Phloem_Atlas_tricycle_module_membership.csv")

Export Networks (Did not run, just for ref)¶

In [140]:
genes_of_interest = module_df %>%
  subset(module %in% modules_of_interest[-11])

expr_of_interest = t(y.list)[genes_of_interest$gene_id,]
expr_of_interest[1:5,1:5]
#>                       B-3      B-4      B-5      L-3      L-4
#> AC186512.3_FG001 6.901539 7.389644 6.975945 6.859593 7.370816
#> AC186512.3_FG007 7.919688 7.754506 7.670946 7.417760 7.988427
#> AC190623.3_FG001 6.575155 7.170788 7.438024 8.223261 8.008850
#> AC196475.3_FG004 6.054319 6.439899 6.424540 5.815344 6.565299
#> AC196475.3_FG005 6.194406 5.872273 6.207174 6.499828 6.314952

# Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
TOM = TOMsimilarityFromExpr(t(expr_of_interest),
                            power = picked_power)
#> TOM calculation: adjacency..
#> ..will use 4 parallel threads.
#>  Fraction of slow calculations: 0.000000
#> ..connectivity..
#> ..matrix multiplication (system BLAS)..
#> ..normalization..
#> ..done.

# Add gene names to row and columns
row.names(TOM) = row.names(expr_of_interest)
colnames(TOM) = row.names(expr_of_interest)
A matrix: 5 x 5 of type dbl
AT1G010100.034374180.034451250.034528330.034605260.03468185
AT1G010200.161250470.159360690.157461790.155556530.15364767
AT1G010300.011639360.011414130.011185630.010954250.01072038
AT1G010400.028171550.027626310.027073170.026513150.02594726
AT1G010501.138159371.142304331.146529461.150827371.15519066
TOM calculation: adjacency..
..will use 94 parallel threads.
 Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
In [141]:
edge_list = data.frame(TOM) %>%
  mutate(
    gene1 = row.names(.)
  ) %>%
  pivot_longer(-gene1) %>%
  dplyr::rename(gene2 = name, correlation = value) %>%
  unique() %>%
  subset(!(gene1==gene2)) %>%
  mutate(
    module1 = module_df[gene1,]$module,
    module2 = module_df[gene2,]$module
  )

head(edge_list)
#> # A tibble: 6 x 5
#>   gene1            gene2            correlation module1   module2  
#>   <chr>            <chr>                  <dbl> <chr>     <chr>    
#> 1 AC186512.3_FG001 AC186512.3_FG007      0.0238 turquoise turquoise
#> 2 AC186512.3_FG001 AC190623.3_FG001      0.0719 turquoise turquoise
#> 3 AC186512.3_FG001 AC196475.3_FG004      0.143  turquoise turquoise
#> 4 AC186512.3_FG001 AC196475.3_FG005      0.0117 turquoise turquoise
#> 5 AC186512.3_FG001 AC196489.3_FG002      0.0181 turquoise turquoise
#> 6 AC186512.3_FG001 AC198481.3_FG004      0.0240 turquoise turquoise

# Export Network file to be read into Cytoscape, VisANT, etc
write_delim(edge_list,
            file = "./tradeseq/Module_network_edgelist.tsv",
            delim = "\t")
A tibble: 6 x 5
gene1gene2correlationmodule1module2
<chr><chr><dbl><fct><fct>
AT1G01010AT1G010201.263899e-05G2MG1-M9S-M7
AT1G01010AT1G010301.360577e-05G2MG1-M9S-M7
AT1G01010AT1G010401.104410e-06G2MG1-M9S-M7
AT1G01010AT1G010506.631369e-05G2MG1-M9G1-M2
AT1G01010AT1G010607.753520e-04G2MG1-M9S-M7
AT1G01010AT1G010703.555517e-08G2MG1-M9S-M7
In [142]:
## 30G too large
nrow(edge_list)
589639806
In [143]:
summary(edge_list$correlation)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000000 0.0000138 0.0056340 0.0982158 0.1385098 0.6644113 
In [157]:
edge_list_cor04 <- edge_list %>% filter(correlation>=0.4)
In [158]:
## 2.6G acceptable
nrow(edge_list_cor04)
52641560
In [159]:
write_delim(edge_list_cor04,
            file = "./tradeseq/Module_network_edgelist_cor04.tsv",
            delim = "\t")
In [160]:
head(edge_list_cor04)
A tibble: 6 x 5
gene1gene2correlationmodule1module2
<chr><chr><dbl><fct><fct>
AT1G01020AT1G010900.4742238S-M7S-M7
AT1G01020AT1G011300.4721234S-M7S-M7
AT1G01020AT1G012600.4529889S-M7S-M7
AT1G01020AT1G012900.5408273S-M7S-M7
AT1G01020AT1G013700.4995471S-M7S-M7
AT1G01020AT1G014800.4947831S-M7S-M7
In [161]:
table(edge_list_cor04$module1)
     G1-M1      G1-M2      G1-M3      G1-M4     G1S-M5     G1S-M6       S-M7 
     60171    8334861     179501       4033    1002022      49934   40704082 
    G2M-M8   G2MG1-M9    G1S-M10 Unassigned 
   2295271      11613         72          0 
In [162]:
table(edge_list_cor04$module2)
     G1-M1      G1-M2      G1-M3      G1-M4     G1S-M5     G1S-M6       S-M7 
     60171    8334861     179501       4033    1002022      49934   40704082 
    G2M-M8   G2MG1-M9    G1S-M10 Unassigned 
   2295271      11613         72          0 

GeneOverlap¶

Modules vs. signal interested¶

In [26]:
module_df <- read.csv("./tradeseq/All_genes_Phloem_Atlas_tricycle_module.csv", row.names=1)
In [27]:
head(module_df)
A data.frame: 6 x 6
gene_idcolorsmoduleBES_upBES_downMYB3R4
<chr><chr><chr><chr><chr><chr>
1AT1G01010turquoiseS-M7 NoNoNo
2AT1G01020red G1S-M4NoNoNo
3AT1G01030turquoiseS-M7 NoNoNo
4AT1G01040turquoiseS-M7 NoNoNo
5AT1G01050blue G1-M2 NoNoNo
6AT1G01060turquoiseS-M7 NoNoNo
In [28]:
## Prepare gene module list
module_sel <- select(module_df, gene_id, module)
module_list <- split(module_sel, f=module_sel$module)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list  <- lapply(module_list, function(x) x[names(x)=="gene_id"])
# convert each sublist into character and eliminate duplicates
module_list  <- lapply(module_list, function(x) as.character(unique(x$gene_id)))
In [29]:
module_list <- module_list[c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")]
In [30]:
## Prepare gene lists to intersect with module
BES_up <- module_df$gene_id[which(module_df$BES_up=="Yes")]
BES_down <- module_df$gene_id[which(module_df$BES_down=="Yes")]
MYB3R4 <- module_df$gene_id[which(module_df$MYB3R4=="Yes")]
g_list <- list("BES_up"=BES_up, "BES_down"=BES_down, "MYB3R4"=MYB3R4)
In [31]:
nrow(module_df)
17386
In [32]:
## GeneOverlap 

# number of integrated features

genome_size <- 17386L

#compare all lists
gom.self <- newGOM(module_list, g_list, genome.size=genome_size)
In [33]:
options(repr.plot.width = 10, repr.plot.height = 10)
 
int <- getNestedList(gom.self, "intersection")
int_matrix <- getMatrix(gom.self, "intersection")
p.val <- getMatrix(gom.self, "pval")
JC <- getMatrix(gom.self, "Jaccard")

# log of p.val for intersection
p.val_log <- -log10(p.val + 1e-200)


olap <- Heatmap(p.val_log, 
                name = "-log10_pval", 
                col = colorRamp2(c(0, 10), 
                                 c("beige", "red")), 
                column_title = "Number of shared genes", 
                cluster_rows = F,
                cluster_columns = F, 
                use_raster= FALSE, 
                show_column_names = TRUE, 
                show_row_names = TRUE, 
                show_row_dend = TRUE,
                heatmap_legend_param = list(title_gp = grid::gpar(fontsize = 12)),
                column_title_gp = grid::gpar(fontsize = 18),
                column_names_gp = grid::gpar(fontsize = 18),
                row_names_gp = grid::gpar(fontsize = 18),
                clustering_distance_rows = "pearson",
                clustering_distance_columns = "pearson", 
                show_column_dend = TRUE, cell_fun = function(j, i, x, y, width, height, fill) {grid.text(sprintf("%.0f", int_matrix[i, j]), x, y, gp = gpar(fontsize = 18))
})
                        
                        # padding - bottom, left, top, right
draw(olap, padding = unit(c(15, 5, 5, 10), "mm"), heatmap_legend_side = "left")
In [34]:
summary(as.numeric(as.matrix(p.val_log)))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
 0.00000  0.00416  0.12161  3.57974  0.80739 60.30735 
In [144]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Intersection_With_GeneList.pdf",width=10, height=10)
draw(olap, padding = unit(c(15, 5, 5, 10), "mm"), heatmap_legend_side = "left")
dev.off()
png: 2

Check signals (z-score) in the proliferation domain Protophloem¶

In [35]:
bsn <- read.csv("./tradeseq/BR_bsn_genes.csv")
In [36]:
head(bsn)
A data.frame: 6 x 2
NameAGI
<chr><chr>
1DWF4 AT3G50660
2CPD AT5G05690
3DET2 AT2G38050
4ROT3 AT4G36380
5CYP90D1 AT3G13730
6BR6OX1 AT5G38970
In [37]:
length(module_df$gene_id)
17386
In [38]:
s1 <- intersect(module_list$`G1-M2`, g_list$BES_up)
#s1 <- g_list$BES_up
length(s1)
#s2 <- intersect(module_list$`G1-M2`, g_list$BES_down)
#s2 <- g_list$BES_down
s2 <- intersect(module_df$gene_id, bsn$AGI)
length(s2)
419
3
In [39]:
s1
  1. 'AT1G01540'
  2. 'AT1G02640'
  3. 'AT1G03870'
  4. 'AT1G04680'
  5. 'AT1G05000'
  6. 'AT1G05530'
  7. 'AT1G05805'
  8. 'AT1G05870'
  9. 'AT1G06850'
  10. 'AT1G07240'
  11. 'AT1G07410'
  12. 'AT1G08500'
  13. 'AT1G08510'
  14. 'AT1G08590'
  15. 'AT1G09950'
  16. 'AT1G10480'
  17. 'AT1G11740'
  18. 'AT1G12240'
  19. 'AT1G12500'
  20. 'AT1G12710'
  21. 'AT1G12850'
  22. 'AT1G13250'
  23. 'AT1G14290'
  24. 'AT1G14685'
  25. 'AT1G15210'
  26. 'AT1G15800'
  27. 'AT1G16510'
  28. 'AT1G18290'
  29. 'AT1G19220'
  30. 'AT1G19835'
  31. 'AT1G19840'
  32. 'AT1G19870'
  33. 'AT1G20010'
  34. 'AT1G20190'
  35. 'AT1G20823'
  36. 'AT1G20840'
  37. 'AT1G21070'
  38. 'AT1G21830'
  39. 'AT1G21900'
  40. 'AT1G22180'
  41. 'AT1G22330'
  42. 'AT1G22460'
  43. 'AT1G22530'
  44. 'AT1G22640'
  45. 'AT1G22910'
  46. 'AT1G23030'
  47. 'AT1G24170'
  48. 'AT1G25230'
  49. 'AT1G25400'
  50. 'AT1G25425'
  51. 'AT1G27200'
  52. 'AT1G27930'
  53. 'AT1G28280'
  54. 'AT1G29060'
  55. 'AT1G29280'
  56. 'AT1G29640'
  57. 'AT1G32090'
  58. 'AT1G32170'
  59. 'AT1G32700'
  60. 'AT1G33230'
  61. 'AT1G33240'
  62. 'AT1G36060'
  63. 'AT1G49500'
  64. 'AT1G50430'
  65. 'AT1G51950'
  66. 'AT1G52750'
  67. 'AT1G55360'
  68. 'AT1G56590'
  69. 'AT1G61100'
  70. 'AT1G61740'
  71. 'AT1G63000'
  72. 'AT1G63430'
  73. 'AT1G64650'
  74. 'AT1G64700'
  75. 'AT1G65295'
  76. 'AT1G65840'
  77. 'AT1G66150'
  78. 'AT1G68490'
  79. 'AT1G68670'
  80. 'AT1G68840'
  81. 'AT1G69030'
  82. 'AT1G69690'
  83. 'AT1G69780'
  84. 'AT1G70370'
  85. 'AT1G70900'
  86. 'AT1G70940'
  87. 'AT1G70950'
  88. 'AT1G71880'
  89. 'AT1G71970'
  90. 'AT1G72150'
  91. 'AT1G72160'
  92. 'AT1G72300'
  93. 'AT1G72430'
  94. 'AT1G72510'
  95. 'AT1G72690'
  96. 'AT1G76160'
  97. 'AT1G76180'
  98. 'AT1G76230'
  99. 'AT1G76600'
  100. 'AT1G76878'
  101. 'AT1G77640'
  102. 'AT1G77850'
  103. 'AT1G78260'
  104. 'AT1G78300'
  105. 'AT1G78860'
  106. 'AT2G01300'
  107. 'AT2G01940'
  108. 'AT2G02180'
  109. 'AT2G02810'
  110. 'AT2G04500'
  111. 'AT2G12462'
  112. 'AT2G16660'
  113. 'AT2G17290'
  114. 'AT2G17300'
  115. 'AT2G18160'
  116. 'AT2G18960'
  117. 'AT2G19580'
  118. 'AT2G19660'
  119. 'AT2G19800'
  120. 'AT2G19880'
  121. 'AT2G20562'
  122. 'AT2G20670'
  123. 'AT2G20780'
  124. 'AT2G20840'
  125. 'AT2G21520'
  126. 'AT2G21880'
  127. 'AT2G22125'
  128. 'AT2G24300'
  129. 'AT2G24765'
  130. 'AT2G25430'
  131. 'AT2G25490'
  132. 'AT2G26730'
  133. 'AT2G27770'
  134. 'AT2G28200'
  135. 'AT2G30230'
  136. 'AT2G30930'
  137. 'AT2G30990'
  138. 'AT2G31010'
  139. 'AT2G32140'
  140. 'AT2G32430'
  141. 'AT2G32580'
  142. 'AT2G34770'
  143. 'AT2G35880'
  144. 'AT2G36310'
  145. 'AT2G36320'
  146. 'AT2G36410'
  147. 'AT2G36830'
  148. 'AT2G37678'
  149. 'AT2G38120'
  150. 'AT2G38310'
  151. 'AT2G38480'
  152. 'AT2G39130'
  153. 'AT2G41110'
  154. 'AT2G41640'
  155. 'AT2G41810'
  156. 'AT2G42320'
  157. 'AT2G42380'
  158. 'AT2G42570'
  159. 'AT2G42580'
  160. 'AT2G42870'
  161. 'AT2G42880'
  162. 'AT2G43340'
  163. 'AT2G44740'
  164. 'AT2G44940'
  165. 'AT2G45070'
  166. 'AT2G45180'
  167. 'AT2G45200'
  168. 'AT2G45670'
  169. 'AT2G46710'
  170. 'AT2G47070'
  171. 'AT2G47440'
  172. 'AT2G47485'
  173. 'AT2G48030'
  174. 'AT3G02170'
  175. 'AT3G02460'
  176. 'AT3G02570'
  177. 'AT3G02580'
  178. 'AT3G02750'
  179. 'AT3G04730'
  180. 'AT3G04910'
  181. 'AT3G05165'
  182. 'AT3G05800'
  183. 'AT3G05890'
  184. 'AT3G05910'
  185. 'AT3G05936'
  186. 'AT3G06060'
  187. 'AT3G06070'
  188. 'AT3G06750'
  189. 'AT3G06890'
  190. 'AT3G07010'
  191. 'AT3G07460'
  192. 'AT3G08670'
  193. 'AT3G08680'
  194. 'AT3G10980'
  195. 'AT3G11130'
  196. 'AT3G11150'
  197. 'AT3G11590'
  198. 'AT3G11700'
  199. 'AT3G12560'
  200. 'AT3G12610'
  201. ...
  202. 'AT3G19030'
  203. 'AT3G19680'
  204. 'AT3G20640'
  205. 'AT3G21700'
  206. 'AT3G23690'
  207. 'AT3G23750'
  208. 'AT3G23820'
  209. 'AT3G24550'
  210. 'AT3G25700'
  211. 'AT3G25730'
  212. 'AT3G28180'
  213. 'AT3G28910'
  214. 'AT3G29370'
  215. 'AT3G43430'
  216. 'AT3G43800'
  217. 'AT3G44260'
  218. 'AT3G46280'
  219. 'AT3G47210'
  220. 'AT3G47220'
  221. 'AT3G47820'
  222. 'AT3G48360'
  223. 'AT3G49350'
  224. 'AT3G49360'
  225. 'AT3G49630'
  226. 'AT3G49940'
  227. 'AT3G50350'
  228. 'AT3G50530'
  229. 'AT3G52290'
  230. 'AT3G52460'
  231. 'AT3G52470'
  232. 'AT3G52500'
  233. 'AT3G53370'
  234. 'AT3G53600'
  235. 'AT3G54000'
  236. 'AT3G54030'
  237. 'AT3G54400'
  238. 'AT3G54770'
  239. 'AT3G54810'
  240. 'AT3G55960'
  241. 'AT3G56000'
  242. 'AT3G56800'
  243. 'AT3G56810'
  244. 'AT3G57420'
  245. 'AT3G58120'
  246. 'AT3G58130'
  247. 'AT3G58620'
  248. 'AT3G58850'
  249. 'AT3G60320'
  250. 'AT3G60540'
  251. 'AT3G61820'
  252. 'AT3G62150'
  253. 'AT3G62390'
  254. 'AT3G62570'
  255. 'AT4G00710'
  256. 'AT4G00740'
  257. 'AT4G02130'
  258. 'AT4G02540'
  259. 'AT4G03190'
  260. 'AT4G04745'
  261. 'AT4G09890'
  262. 'AT4G10380'
  263. 'AT4G11280'
  264. 'AT4G12420'
  265. 'AT4G12730'
  266. 'AT4G13340'
  267. 'AT4G14750'
  268. 'AT4G14980'
  269. 'AT4G15800'
  270. 'AT4G15920'
  271. 'AT4G16380'
  272. 'AT4G16660'
  273. 'AT4G17870'
  274. 'AT4G17900'
  275. 'AT4G18380'
  276. 'AT4G18670'
  277. 'AT4G19120'
  278. 'AT4G19160'
  279. 'AT4G20170'
  280. 'AT4G20300'
  281. 'AT4G20870'
  282. 'AT4G21960'
  283. 'AT4G22820'
  284. 'AT4G24230'
  285. 'AT4G25280'
  286. 'AT4G25360'
  287. 'AT4G26690'
  288. 'AT4G27270'
  289. 'AT4G28720'
  290. 'AT4G28730'
  291. 'AT4G29780'
  292. 'AT4G30190'
  293. 'AT4G30400'
  294. 'AT4G30410'
  295. 'AT4G30440'
  296. 'AT4G30500'
  297. 'AT4G31910'
  298. 'AT4G32150'
  299. 'AT4G32480'
  300. 'AT4G33440'
  301. 'AT4G34800'
  302. 'AT4G34810'
  303. 'AT4G35320'
  304. 'AT4G35335'
  305. 'AT4G35500'
  306. 'AT4G36040'
  307. 'AT4G36500'
  308. 'AT4G36880'
  309. 'AT4G37080'
  310. 'AT4G37235'
  311. 'AT4G37470'
  312. 'AT4G39390'
  313. 'AT5G01800'
  314. 'AT5G01810'
  315. 'AT5G02580'
  316. 'AT5G03040'
  317. 'AT5G03520'
  318. 'AT5G03720'
  319. 'AT5G04470'
  320. 'AT5G04480'
  321. 'AT5G05100'
  322. 'AT5G05440'
  323. 'AT5G06390'
  324. 'AT5G06700'
  325. 'AT5G06930'
  326. 'AT5G07440'
  327. 'AT5G08335'
  328. 'AT5G09870'
  329. 'AT5G10720'
  330. 'AT5G11000'
  331. 'AT5G11640'
  332. 'AT5G11970'
  333. 'AT5G12940'
  334. 'AT5G13180'
  335. 'AT5G13820'
  336. 'AT5G14110'
  337. 'AT5G14930'
  338. 'AT5G15150'
  339. 'AT5G15330'
  340. 'AT5G15350'
  341. 'AT5G15580'
  342. 'AT5G15600'
  343. 'AT5G16110'
  344. 'AT5G16190'
  345. 'AT5G16360'
  346. 'AT5G17980'
  347. 'AT5G18670'
  348. 'AT5G18860'
  349. 'AT5G19190'
  350. 'AT5G19200'
  351. 'AT5G19340'
  352. 'AT5G20050'
  353. 'AT5G20150'
  354. 'AT5G20250'
  355. 'AT5G21280'
  356. 'AT5G22090'
  357. 'AT5G22740'
  358. 'AT5G22950'
  359. 'AT5G23280'
  360. 'AT5G23360'
  361. 'AT5G23860'
  362. 'AT5G23870'
  363. 'AT5G23940'
  364. 'AT5G25100'
  365. 'AT5G28770'
  366. 'AT5G35525'
  367. 'AT5G40450'
  368. 'AT5G40890'
  369. 'AT5G41080'
  370. 'AT5G44020'
  371. 'AT5G44130'
  372. 'AT5G46910'
  373. 'AT5G48560'
  374. 'AT5G49360'
  375. 'AT5G49710'
  376. 'AT5G52280'
  377. 'AT5G52882'
  378. 'AT5G53880'
  379. 'AT5G54145'
  380. 'AT5G54380'
  381. 'AT5G54750'
  382. 'AT5G54850'
  383. 'AT5G55960'
  384. 'AT5G56040'
  385. 'AT5G57740'
  386. 'AT5G58530'
  387. 'AT5G58560'
  388. 'AT5G58650'
  389. 'AT5G60300'
  390. 'AT5G61570'
  391. 'AT5G61590'
  392. 'AT5G63650'
  393. 'AT5G64740'
  394. 'AT5G64850'
  395. 'AT5G65270'
  396. 'AT5G65390'
  397. 'AT5G65660'
  398. 'AT5G66680'
  399. 'AT5G67390'
  400. 'AT5G67470'
  401. 'AT5G67480'
In [40]:
s2
  1. 'AT3G13730'
  2. 'AT3G50660'
  3. 'AT5G05690'
In [41]:
save(s1,s2,file='./tradeseq/Phloem_Atlas_upper_lower_cells_gene_lists.rds')
In [42]:
ulgl <- list("Upper_cells"=s1, "Lower_cells"=s2)
# Find the maximum length
max_length <- max(lengths(ulgl))

# Fill with NA to make all equal length
ulgl_df <- data.frame(lapply(ulgl, function(x) c(x, rep(NA, max_length - length(x)))))
In [43]:
write.csv(ulgl_df, "./tradeseq/Phloem_Atlas_upper_lower_cells_gene_lists.csv")
In [44]:
rc.sub <- subset(rc.integrated, cells= colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0")])
#rc.sub <- subset(rc.integrated, cells= colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0" | rc.integrated$tricycleCCStage == "S")])
In [45]:
## 1177 G1 cells out of 410 cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0")])
## 1339 S cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "S")])
## 1129 G2M cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G2M")])
1177
1339
1129
In [46]:
zscore <- function(x){(x-mean(x))/sd(x)}
s1z <- as.numeric(apply(apply(as.matrix(rc.sub@assays$RNA@data)[s1,], 1, zscore), 1, function(x){mean(x,na.rm = TRUE)}))
s2z <- as.numeric(apply(apply(as.matrix(rc.sub@assays$RNA@data)[s2,], 1, zscore), 1, function(x){mean(x,na.rm = TRUE)}))
In [47]:
summary(s1z)
summary(s2z)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.40912 -0.25354 -0.04296  0.00000  0.16660  0.96865 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.3550 -0.3550 -0.2367  0.0000  0.1256  9.0612 
In [48]:
dat <- data.frame(cell_id=colnames(rc.sub), tricyclePosition=rc.sub$tricyclePosition, Signature1=s1z, Signature2=s2z)
dat <- dat %>% mutate(diff = abs(Signature1-Signature2))
In [49]:
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
In [50]:
dat$Signature1_scaled <- range01(dat$Signature1)
dat$Signature2_scaled <- range01(dat$Signature2)
dat$ratio <- dat$Signature1_scaled/dat$Signature2_scaled
In [51]:
head(dat)
A data.frame: 6 x 8
cell_idtricyclePositionSignature1Signature2diffSignature1_scaledSignature2_scaledratio
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
APL_AAACCCACAGTTGAAA-1APL_AAACCCACAGTTGAAA-10.5938682 0.36621714-0.355034420.7212515520.56274650.00000000 Inf
APL_AAACCCATCTGCGAGC-1APL_AAACCCATCTGCGAGC-10.6420312 0.28601250 0.290786810.0047743060.50453310.068586137.356197
APL_AAAGAACTCGAGAAGC-1APL_AAAGAACTCGAGAAGC-10.6490424 0.13330378-0.355034420.4883381940.39369550.00000000 Inf
APL_AAAGGTAGTCTGTGAT-1APL_AAAGGTAGTCTGTGAT-10.7424053 0.11557090 1.179504941.0639340390.38082480.162967872.336809
APL_AAAGGTATCCTGTAGA-1APL_AAAGGTATCCTGTAGA-10.6932624-0.07080545-0.078563770.0077583170.24555090.029361158.363123
APL_AAAGTGAGTTTCCATT-1APL_AAAGTGAGTTTCCATT-10.5952930-0.08766026-0.355034420.2673741600.23331750.00000000 Inf
In [52]:
dat$inianno <- rep("Unassigned", length(rc.integrated))
dat$anno <- rep("Unassigned", length(rc.integrated))
#dat$anno[which((dat$Signature1 >0) & (dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 >0) & (dat$Signature2 > dat$Signature1))] = "S2"
dat$inianno[which((dat$Signature1 > dat$Signature2))] = "S1"
dat$inianno[which((dat$Signature2 > dat$Signature1))] = "S2"
dat$anno[(dat$inianno == "S1")][(order(dat$Signature1[which(dat$inianno == "S1")]) < nrow(dat)/10)] = "S1"
dat$anno[(dat$inianno == "S2")][(order(dat$Signature2[which(dat$inianno == "S2")]) < nrow(dat)/10)] = "S2"
#dat$anno[which((dat$Signature1 >0) & (dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 >0) & (dat$Signature2 > dat$Signature1))] = "S2"
#dat$anno[which((dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 > dat$Signature1))] = "S2"
#dat$anno[which((dat$Signature1 > dat$Signature2) & (dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))))] = "S1"
#dat$anno[which((dat$Signature2 > dat$Signature1) & (dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "S2"
#dat$anno[which((dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))))] = "S1"
#dat$anno[which((dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "S2"
#dat$anno[which((dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))) & (dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "Unassigned"
In [53]:
head(dat)
A data.frame: 6 x 10
cell_idtricyclePositionSignature1Signature2diffSignature1_scaledSignature2_scaledratioiniannoanno
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr>
APL_AAACCCACAGTTGAAA-1APL_AAACCCACAGTTGAAA-10.5938682 0.36621714-0.355034420.7212515520.56274650.00000000 InfS1Unassigned
APL_AAACCCATCTGCGAGC-1APL_AAACCCATCTGCGAGC-10.6420312 0.28601250 0.290786810.0047743060.50453310.068586137.356197S2Unassigned
APL_AAAGAACTCGAGAAGC-1APL_AAAGAACTCGAGAAGC-10.6490424 0.13330378-0.355034420.4883381940.39369550.00000000 InfS1Unassigned
APL_AAAGGTAGTCTGTGAT-1APL_AAAGGTAGTCTGTGAT-10.7424053 0.11557090 1.179504941.0639340390.38082480.162967872.336809S2Unassigned
APL_AAAGGTATCCTGTAGA-1APL_AAAGGTATCCTGTAGA-10.6932624-0.07080545-0.078563770.0077583170.24555090.029361158.363123S1Unassigned
APL_AAAGTGAGTTTCCATT-1APL_AAAGTGAGTTTCCATT-10.5952930-0.08766026-0.355034420.2673741600.23331750.00000000 InfS1Unassigned
In [54]:
table(dat$anno)
        S1         S2 Unassigned 
       117        117        943 
In [55]:
head(dat)
A data.frame: 6 x 10
cell_idtricyclePositionSignature1Signature2diffSignature1_scaledSignature2_scaledratioiniannoanno
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><chr><chr>
APL_AAACCCACAGTTGAAA-1APL_AAACCCACAGTTGAAA-10.5938682 0.36621714-0.355034420.7212515520.56274650.00000000 InfS1Unassigned
APL_AAACCCATCTGCGAGC-1APL_AAACCCATCTGCGAGC-10.6420312 0.28601250 0.290786810.0047743060.50453310.068586137.356197S2Unassigned
APL_AAAGAACTCGAGAAGC-1APL_AAAGAACTCGAGAAGC-10.6490424 0.13330378-0.355034420.4883381940.39369550.00000000 InfS1Unassigned
APL_AAAGGTAGTCTGTGAT-1APL_AAAGGTAGTCTGTGAT-10.7424053 0.11557090 1.179504941.0639340390.38082480.162967872.336809S2Unassigned
APL_AAAGGTATCCTGTAGA-1APL_AAAGGTATCCTGTAGA-10.6932624-0.07080545-0.078563770.0077583170.24555090.029361158.363123S1Unassigned
APL_AAAGTGAGTTTCCATT-1APL_AAAGTGAGTTTCCATT-10.5952930-0.08766026-0.355034420.2673741600.23331750.00000000 InfS1Unassigned
In [56]:
nrow(dat)*0.2
235.4
In [57]:
options(repr.plot.width = 6, repr.plot.height = 6)
hist(dat$tricyclePosition, breaks=100)
In [58]:
dat %>%
  ggplot( aes(x=tricyclePosition, fill=anno)) +
    geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080","#cccccc")) +
    #theme_ipsum() +
    labs(fill="")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
In [59]:
table(dat$anno)
        S1         S2 Unassigned 
       117        117        943 

Find Markers¶

In [60]:
rc.re <- CreateSeuratObject(rc.sub@assays$RNA@data)
rc.re$upper_lower_anno <- dat$anno
rc.re <- subset(rc.re, cells=colnames(rc.re)[rc.re$upper_lower_anno !="Unassigned"])
Idents(rc.re) <- rc.re$upper_lower_anno
In [61]:
Clust_Markers <- FindAllMarkers(rc.re, only.pos = TRUE)
Calculating cluster S1

Calculating cluster S2

In [62]:
Clust_Markers <- Clust_Markers %>% filter(p_val <0.01) %>% arrange(p_val_adj)
In [63]:
Clust_Markers
A data.frame: 1286 x 7
p_valavg_log2FCpct.1pct.2p_val_adjclustergene
<dbl><dbl><dbl><dbl><dbl><fct><chr>
AT5G056909.510987e-271.01772590.8890.1711.654531e-22S2AT5G05690
AT1G794304.910976e-151.83038190.7520.3338.543135e-11S1AT1G79430
AT1G620452.394913e-132.31246100.4870.0774.166191e-09S1AT1G62045
AT1G780408.201470e-131.03379601.0000.9911.426728e-08S1AT1G78040
AT4G136008.346298e-131.44723780.4360.0431.451922e-08S1AT4G13600
AT3G571501.068495e-120.86291810.8210.4441.858754e-08S2AT3G57150
AT5G406301.317009e-120.58349870.6070.1622.291069e-08S2AT5G40630
AT3G177301.459630e-122.04652670.4020.0262.539172e-08S1AT3G17730
AT2G441202.968049e-120.68662360.9740.9575.163219e-08S2AT2G44120
AT5G111003.882707e-120.66166190.5470.1286.754356e-08S1AT5G11100
AT1G674304.389210e-120.43413351.0001.0007.635470e-08S2AT1G67430
AT5G546604.602154e-121.18396770.5470.1548.005908e-08S1AT5G54660
AT2G270304.698602e-120.99140671.0000.9748.173688e-08S1AT2G27030
AT5G507204.788652e-121.57251790.7090.2748.330339e-08S1AT5G50720
AT3G186706.497293e-120.85840670.3850.0261.130269e-07S1AT3G18670
AT1G699707.611523e-121.89656120.5380.1451.324101e-07S1AT1G69970
AT3G238308.800328e-120.80413250.8460.5131.530905e-07S2AT3G23830
AT3G536201.619160e-111.32548130.8460.7262.816690e-07S1AT3G53620
AT3G153572.592391e-110.60222290.7950.3504.509723e-07S2AT3G15357
AT1G085003.034708e-110.85308490.3930.0435.279178e-07S1AT1G08500
AT4G138503.286909e-110.62084120.9150.6155.717907e-07S2AT4G13850
AT1G543303.567361e-111.46407660.4190.0606.205781e-07S1AT1G54330
AT1G659103.925331e-111.28141740.3760.0346.828505e-07S1AT1G65910
AT2G194603.938409e-111.59502270.6320.2486.851256e-07S1AT2G19460
AT2G183804.339806e-112.37684230.4530.0947.549527e-07S1AT2G18380
AT1G279204.619798e-110.70667760.3500.0178.036600e-07S1AT1G27920
AT1G283954.927537e-110.53472080.8550.4968.571944e-07S2AT1G28395
AT2G414305.176302e-111.08648060.9910.9919.004696e-07S1AT2G41430
AT1G268805.487088e-110.44853851.0001.0009.545338e-07S2AT1G26880
AT2G275106.605192e-110.66332500.9830.8721.149039e-06S2AT2G27510
........................
AT1G685600.00046480520.44272900.3590.1621S2AT1G68560
AT5G464300.00046919340.26956210.9911.0001S2AT5G46430
AT5G155200.00047561620.27208090.9740.9231S2AT5G15520
AT5G353600.00048495520.27829170.9230.6921S2AT5G35360
AT1G305800.00054365060.30748410.9660.8631S2AT1G30580
AT4G387400.00055367370.31128481.0000.9571S2AT4G38740
AT4G261900.00056876620.26709460.9490.7441S2AT4G26190
AT2G314100.00059251790.30050310.8720.8121S2AT2G31410
AT1G697000.00071494040.34306980.7780.5641S2AT1G69700
AT1G540500.00079663670.32507160.4530.2391S2AT1G54050
AT2G011400.00101556770.32165060.9740.8631S2AT2G01140
AT3G063550.00111330540.95010640.9740.9831S2AT3G06355
AT1G073700.00112838790.28793320.7440.4701S2AT1G07370
AT3G544700.00115105330.26984790.9230.7521S2AT3G54470
AT2G287900.00121932910.41194280.9490.8721S2AT2G28790
AT2G280000.00125808710.26037420.7010.4531S2AT2G28000
AT4G392600.00126181680.33565700.9910.9401S2AT4G39260
AT4G348700.00126635960.28043061.0001.0001S2AT4G34870
AT5G473200.00136085320.25779750.9060.7351S2AT5G47320
AT1G282900.00136391650.25912250.4530.2741S2AT1G28290
AT5G200200.00136815680.26258711.0001.0001S2AT5G20020
AT4G011000.00139580820.28100300.9320.7611S2AT4G01100
AT5G103900.00146672150.40300720.6150.3501S2AT5G10390
AT5G522400.00163756250.26568580.8890.6581S2AT5G52240
AT4G398000.00328721680.34326160.7090.5041S2AT4G39800
AT1G429600.00444640390.26525670.9570.8891S2AT1G42960
AT4G090300.00449268630.25818130.9740.9151S2AT4G09030
AT2G295700.00476180690.32175500.7260.5901S2AT2G29570
AT2G231200.00546084060.27171790.8030.5811S2AT2G23120
AT3G469400.00582900140.29154390.6840.4531S2AT3G46940
In [64]:
write.csv(Clust_Markers, "./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_DE_20240503.csv")

GO of Upper and Lower cell markers¶

In [65]:
## Prepare gene module list
module_sel <- select(Clust_Markers, gene, cluster)
module_list <- split(module_sel, f=module_sel$cluster)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list  <- lapply(module_list, function(x) x[names(x)=="gene"])
# convert each sublist into character and eliminate duplicates
module_list  <- lapply(module_list, function(x) as.character(unique(x$gene)))
In [66]:
cluster_GO <- gost(module_list, organism = "athaliana", correction_method = "fdr", significant = F, multi_query = F)

cluster_GO_df <- cluster_GO[[1]]

cluster_GO_sig <- filter(cluster_GO_df, p_value<=0.01)

# top  terms for each cluster

cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(query) %>%
  top_n(5, wt = -p_value) %>%
  arrange(desc(p_value)) -> top_GO

GO_n <- cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(term_id) %>%
  tally() %>%
  arrange(desc(n))


GO_n <- dplyr::rename(GO_n, "n_clusters"=n)

cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)

# get all terms for the top ones so that all clusters have values

top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)


#spread and plot


top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)

spread_GO <- spread(top_GO_sel, key = query, p_value)

spread_GO[is.na(spread_GO)] <- 1

spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
In [67]:
head(cluster_GO_sig)
A data.frame: 6 x 14
querysignificantp_valueterm_sizequery_sizeintersection_sizeprecisionrecallterm_idsourceterm_nameeffective_domain_sizesource_orderparents
<chr><lgl><dbl><int><int><int><dbl><dbl><chr><chr><chr><int><int><list>
1S1TRUE3.958123e-31436603710.11774460.16284404GO:0016192GO:BPvesicle-mediated transport 21899 5229GO:00068....
2S1TRUE2.297663e-23752603810.13432840.10771277GO:0045184GO:BPestablishment of protein localization2189911702GO:00081....
3S1TRUE8.723130e-23976603920.15257050.09426230GO:0051641GO:BPcellular localization 2189914309GO:00099....
4S1TRUE1.713723e-22712603770.12769490.10814607GO:0015031GO:BPprotein transport 21899 4854GO:00451....
5S1TRUE3.336778e-22812603820.13598670.10098522GO:0008104GO:BPprotein localization 21899 3157 GO:0070727
6S1TRUE6.497090e-22841603830.13764510.09869203GO:0070727GO:BPcellular macromolecule localization 2189916772GO:00330....
In [669]:
write.csv(cluster_GO_sig[,-14], "./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503.csv")
In [68]:
cluster_GO_sig <- read.csv("./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503_selected.csv")
In [70]:
cluster_GO_sig <- cluster_GO_sig %>% filter(include=="y")
In [71]:
cluster_GO_sig
A data.frame: 10 x 15
Xquerysignificantp_valueterm_sizequery_sizeintersection_sizeprecisionrecallterm_idsourceterm_nameeffective_domain_sizesource_orderinclude
<int><chr><lgl><dbl><int><int><int><dbl><dbl><chr><chr><chr><int><int><chr>
1S1TRUE3.960000e-31 436603 710.1177446100.16284404GO:0016192GO:BPvesicle-mediated transport 21899 5229y
5S1TRUE3.340000e-22 812603 820.1359867330.10098522GO:0008104GO:BPprotein localization 21899 3157y
14S1TRUE1.740000e-11 457603 450.0746268660.09846827GO:0051649GO:BPestablishment of localization in cell2189914317y
47S1TRUE1.636065e-03 421603 270.0447761190.06413302GO:0016049GO:BPcell growth 21899 5141y
48S1TRUE1.656147e-03 693603 380.0630182420.05483405GO:0071554GO:BPcell wall organization or biogenesis 2189917260y
222S2TRUE7.940000e-7214034191520.3627684960.10833927GO:0006412GO:BPtranslation 21899 2194y
227S2TRUE4.120000e-6352704192640.6300715990.05009488GO:0010467GO:BPgene expression 21899 4207y
233S2TRUE3.690000e-5773134193010.7183770880.04115958GO:0009058GO:BPbiosynthetic process 21899 3271y
257S2TRUE2.960000e-08 152419 180.0429594270.11842105GO:0006457GO:BPprotein folding 21899 2227y
260S2TRUE4.470000e-06 4419 40.0095465391.00000000GO:0006858GO:BPextracellular transport 21899 2551y
In [72]:
cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(query) %>%
  top_n(10, wt = -p_value) %>%
  arrange(desc(p_value)) -> top_GO

GO_n <- cluster_GO_sig %>%
  filter(source=="GO:BP", intersection_size>=4) %>%
  group_by(term_id) %>%
  tally() %>%
  arrange(desc(n))


GO_n <- dplyr::rename(GO_n, "n_clusters"=n)

cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)

# get all terms for the top ones so that all clusters have values

top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)


#spread and plot


top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)

spread_GO <- spread(top_GO_sel, key = query, p_value)

spread_GO[is.na(spread_GO)] <- 1

spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
In [73]:
spread_GO_m
A matrix: 10 x 2 of type dbl
S1S2
translation 0.00000071.099944
protein folding 3.106984 7.528682
extracellular transport 0.000000 5.350108
protein localization21.476673 0.000000
biosynthetic process 0.00000056.432419
gene expression 0.00000062.385030
cell growth 2.786199 0.000000
vesicle-mediated transport30.402511 0.000000
establishment of localization in cell10.760539 0.000000
cell wall organization or biogenesis 2.780901 0.000000
In [77]:
(spread_GO_m <- spread_GO_m[-2,])
A matrix: 9 x 2 of type dbl
S1S2
translation 0.00000071.099944
extracellular transport 0.000000 5.350108
protein localization21.476673 0.000000
biosynthetic process 0.00000056.432419
gene expression 0.00000062.385030
cell growth 2.786199 0.000000
vesicle-mediated transport30.402511 0.000000
establishment of localization in cell10.760539 0.000000
cell wall organization or biogenesis 2.780901 0.000000
In [78]:
options(repr.plot.width = 9, repr.plot.height = 6)

GO_hm <- Heatmap(spread_GO_m, 
                 name = "-log10_pval", 
                 heatmap_legend_param = list(title_position="topcenter", color_bar = "continuous",title_gp = grid::gpar(fontsize = 12)), 
                 col = colorRamp2(c(0, 5), 
                                  c("beige", "#e31a1c")), 
                 cluster_rows = T,
                 cluster_columns = F, 
                 use_raster= FALSE, 
                 show_column_names = TRUE, 
                 show_row_names = TRUE, 
                 show_row_dend = TRUE, 
                 show_column_dend = TRUE, 
                column_title_gp = grid::gpar(fontsize = 18),
                column_names_gp = grid::gpar(fontsize = 18),
                 clustering_distance_rows = "pearson",
                 clustering_distance_columns = "pearson", 
                 row_names_gp = gpar(fontsize = 18)) 


# padding - bottom, left, top, right
draw(GO_hm, padding = unit(c(1, 10, 1, 100), "mm"), heatmap_legend_side = "left")
In [79]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503.pdf",width=9, height=6)
draw(GO_hm, padding = unit(c(1, 10, 1, 100), "mm"), heatmap_legend_side = "left")
dev.off()
png: 2

Gene Expression¶

In [670]:
GL3 <- read.csv("./tradeseq/BR_bsn_genes.csv")
In [671]:
GL3
A data.frame: 7 x 2
NameAGI
<chr><chr>
DWF4 AT3G50660
CPD AT5G05690
DET2 AT2G38050
ROT3 AT4G36380
CYP90D1 AT3G13730
BR6OX1 AT5G38970
BR6OX2 AT3G30180
In [672]:
rc.sub <- subset(rc.sub,cells=dat$cell_id)
In [673]:
rc.sub$anno <- dat$anno
In [674]:
# BAS1 catabolic gene
geneid <- 'AT2G26710'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.668292682926829
In [675]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
geom_dotplot(binaxis='y', stackdir='center',
               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("BAS1")
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
In [676]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_BAS1_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
geom_dotplot(binaxis='y', stackdir='center',
               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("BAS1"))
dev.off()
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
png: 2
In [677]:
# SOB7 catabolic gene
geneid <- 'AT1G17060'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.0195121951219512
In [678]:
# BEN1 catabolic gene
geneid <- 'AT2G45400'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.0780487804878049
In [679]:
#OPS
geneid <- 'AT3G09070'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
2.61219512195122
In [680]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
geom_dotplot(binaxis='y', stackdir='center',
               stackratio=1, dotsize=0.1)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("OPS")
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
In [681]:
#OPL2
geneid <- 'AT2G38070'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.85609756097561
In [682]:
# CSI1
geneid <- 'AT2G22125'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
4.67073170731707
In [683]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.1)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CSI1")
In [684]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CSI1_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#              stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CSI1"))
dev.off()
png: 2
In [685]:
# CESA6
geneid <- 'AT5G64740'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
6.50487804878049
In [686]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.2)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CESA6")
In [687]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CESA6_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CESA6"))
dev.off()
png: 2
In [688]:
# DET2
geneid <- 'AT2G38050'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
1.3609756097561
In [689]:
# ROT3
geneid <- 'AT4G36380'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.151219512195122
In [690]:
# CYP90D1
geneid <- 'AT3G13730'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.0195121951219512
In [691]:
# BR6OX1
geneid <- 'AT5G38970'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
0.441463414634146
In [692]:
# BR6OX2
geneid <- 'AT3G30180'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
3.02682926829268
In [693]:
#CPD(AT5G05690)
geneid <- 'AT5G05690'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
In [694]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.1)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CPD")
In [695]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CPD_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("CPD"))
dev.off()
png: 2
In [696]:
#DWF(AT3G50660)
geneid <- 'AT3G50660'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
In [697]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("DWF4")
In [698]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_DWF4_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("DWF4"))
dev.off()
png: 2
In [699]:
#PHB3(AT3G50660)
geneid <- 'AT5G40770'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
                   signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))

toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))

# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
  geom_boxplot()
In [700]:
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("PHB3")
In [701]:
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_PHB3_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) + 
geom_boxplot()+theme_classic()+  
#geom_dotplot(binaxis='y', stackdir='center',
#               stackratio=1, dotsize=0.8)+
    theme(text = element_text(size=20),
        axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
          axis.title.y = element_blank()) + ggtitle("PHB3"))
dev.off()
png: 2
In [ ]: